In [1]:
import pickle as pickle
import os
import pandas as pd
import torch
from sklearn.metrics import accuracy_score
import numpy as np
import matplotlib.pyplot as plt
import random
from itertools import chain
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
import copy
import csv
import json
import logging
import os
import torch.nn as nn
from tqdm.auto import tqdm
from transformers import AdamW, get_linear_schedule_with_warmup
import torch.nn.functional as F

from transformers import BartModel, BartConfig
from transformers import ElectraModel, ElectraConfig, ElectraPreTrainedModel, ElectraTokenizer
from transformers import AutoTokenizer,AutoModel, RobertaPreTrainedModel, AutoConfig


In [2]:
random_seed=42
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed) # if use multi-GPU
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

In [3]:
# Dataset 구성.
class RE_Dataset(torch.utils.data.Dataset):
    def __init__(self, tokenized_dataset, labels):
        self.tokenized_dataset = tokenized_dataset
        self.labels = labels
    
    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.tokenized_dataset.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

    def __len__(self):
        return len(self.labels)

# tsv 파일을 불러옵니다.
def load_data(dataset_dir):
    dataset = pd.read_csv(dataset_dir, delimiter='\t')
    li = []
    for s1, s2 in zip(list(dataset['SENTENCE1']), list(dataset['SENTENCE2'])):
        li.append(s1+' '+s2)
    #dataset['sentence'] = li
    dataset["ANSWER"] = dataset["ANSWER"].astype(int)
    return dataset

In [4]:
def compute_metrics(preds, labels):
    assert len(preds) == len(labels)
    return acc_and_f1(preds, labels)


def simple_accuracy(preds, labels):
    return (preds == labels).mean()


def official_f1():

    with open(os.path.join('/opt/ml/eval/result.txt'), "r", encoding="utf-8") as f:
        macro_result = list(f)[-1]
        macro_result = macro_result.split(":")[1].replace(">>>", "").strip()
        macro_result = macro_result.split("=")[1].strip().replace("%", "")
        macro_result = float(macro_result) / 100

    return macro_result

def acc_and_f1(preds, labels, average="macro"):
    acc = simple_accuracy(preds, labels)
    return {
        "acc": acc,
        #"f1": official_f1(),
    }

In [5]:
def convert_sentence_to_features(train_dataset, tokenizer, max_len, mode='train'):
    
    max_seq_len=max_len
    #cls_token=tokenizer.cls_token
    #sep_token=tokenizer.sep_token
    pad_token=tokenizer.pad_token_id
    add_sep_token=False
    mask_padding_with_zero=True
    
    all_input_ids = []
    all_attention_mask = []
    all_e1_mask=[]
    all_e2_mask=[]
    all_label=[]
    m_len=0
    for idx in tqdm(range(len(train_dataset))):
        sentence = '<''s''>' + train_dataset['SENTENCE1'][idx][:train_dataset['start_s1'][idx]] \
            + ' <e1> ' + train_dataset['SENTENCE1'][idx][train_dataset['start_s1'][idx]:train_dataset['end_s1'][idx]] \
            + ' </e1> ' + train_dataset['SENTENCE1'][idx][train_dataset['end_s1'][idx]:] + '</s>' \
            + ' ' \
            + '<s>' + train_dataset['SENTENCE2'][idx][:train_dataset['start_s2'][idx]] \
            + ' <e2> ' + train_dataset['SENTENCE2'][idx][train_dataset['start_s2'][idx]:train_dataset['end_s2'][idx]] \
            + ' </e2> ' + train_dataset['SENTENCE2'][idx][train_dataset['end_s2'][idx]:] + '</s>'
        #print(sentence)
        
        token = tokenizer.tokenize(sentence)
        m_len = max(m_len, len(token))
        e11_p = token.index("<e1>")  # the start position of entity1
        e12_p = token.index("</e1>")  # the end position of entity1
        e21_p = token.index("<e2>")  # the start position of entity2
        e22_p = token.index("</e2>")  # the end position of entity2

        token[e11_p] = "$"
        token[e12_p] = "$"
        token[e21_p] = "#"
        token[e22_p] = "#"

        #print(token)

        e11_p += 1
        e12_p += 1
        e21_p += 1
        e22_p += 1

        special_tokens_count = 1

        if len(token) < max_seq_len - special_tokens_count:
            input_ids = tokenizer.convert_tokens_to_ids(token)
            attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids)

            padding_length = max_seq_len - len(input_ids)
            input_ids = input_ids + ([pad_token] * padding_length)
            attention_mask = attention_mask + ([0 if mask_padding_with_zero else 1] * padding_length)

            e1_mask = [0] * len(attention_mask)
            e2_mask = [0] * len(attention_mask)

            for i in range(e11_p, e12_p + 1):
                e1_mask[i] = 1
            for i in range(e21_p, e22_p + 1):
                e2_mask[i] = 1

            assert len(input_ids) == max_seq_len, "Error with input length {} vs {}".format(len(input_ids), max_seq_len)
            assert len(attention_mask) == max_seq_len, "Error with attention mask length {} vs {}".format(
                len(attention_mask), max_seq_len
            )

            all_input_ids.append(input_ids)
            all_attention_mask.append(attention_mask)
            all_e1_mask.append(e1_mask)
            all_e2_mask.append(e2_mask)
            all_label.append(train_dataset['ANSWER'][idx])
    print(m_len)

    all_features = {
        'input_ids' : torch.tensor(all_input_ids),
        'attention_mask' : torch.tensor(all_attention_mask),
        'e1_mask' : torch.tensor(all_e1_mask),
        'e2_mask' : torch.tensor(all_e2_mask)
    }  
    return RE_Dataset(all_features, all_label)

In [6]:
def reduce_loss(loss, reduction='mean'):
    return loss.mean() if reduction=='mean' else loss.sum() if reduction=='sum' else loss

# Implementation from fastai https://github.com/fastai/fastai2/blob/master/fastai2/layers.py#L338
class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self, e:float=0.05, reduction='mean'):
        super().__init__()
        self.e,self.reduction = e,reduction
    
    def forward(self, output, target):
        # number of classes
        c = output.size()[-1]
        log_preds = F.log_softmax(output, dim=-1)
        loss = reduce_loss(-log_preds.sum(dim=-1), self.reduction)
        nll = F.nll_loss(log_preds, target, reduction=self.reduction)
        # (1-ε)* H(q,p) + ε*H(u,p)
        return (1-self.e)*nll + self.e*(loss/c)

In [7]:
class FCLayer(nn.Module):
    def __init__(self, input_dim, output_dim, dropout_rate=0.0, use_activation=True):
        super(FCLayer, self).__init__()
        self.use_activation = use_activation
        self.dropout = nn.Dropout(dropout_rate)
        self.linear = nn.Linear(input_dim, output_dim)
        self.tanh = nn.Tanh()

    def forward(self, x):
        x = self.dropout(x)
        if self.use_activation:
            x = self.tanh(x)
        return self.linear(x)


class Roberta_WiC(RobertaPreTrainedModel):
    def __init__(self,  model_name, config, dropout_rate):
        super(Roberta_WiC, self).__init__(config)
        self.model = AutoModel.from_pretrained(model_name, config=config)  # Load pretrained XLMRoberta

        self.num_labels = config.num_labels

        #self.cls_fc_layer = FCLayer(config.hidden_size, config.hidden_size, dropout_rate)
        self.eos_fc_layer = FCLayer(config.hidden_size, config.hidden_size, dropout_rate)
        self.entity_fc_layer1 = FCLayer(config.hidden_size, config.hidden_size, dropout_rate)
        self.entity_fc_layer2 = FCLayer(config.hidden_size, config.hidden_size, dropout_rate)

        self.label_classifier = FCLayer(
            config.hidden_size * 3,
            config.num_labels,
            dropout_rate,
            use_activation=False,
        )

    @staticmethod
    def entity_average(hidden_output, e_mask):
        """
        Average the entity hidden state vectors (H_i ~ H_j)
        :param hidden_output: [batch_size, j-i+1, dim]
        :param e_mask: [batch_size, max_seq_len]
                e.g. e_mask[0] == [0, 0, 0, 1, 1, 1, 0, 0, ... 0]
        :return: [batch_size, dim]
        """
        e_mask_unsqueeze = e_mask.unsqueeze(1)  # [b, 1, j-i+1]
        length_tensor = (e_mask != 0).sum(dim=1).unsqueeze(1)  # [batch_size, 1]

        # [b, 1, j-i+1] * [b, j-i+1, dim] = [b, 1, dim] -> [b, dim]
        sum_vector = torch.bmm(e_mask_unsqueeze.float(), hidden_output).squeeze(1)
        avg_vector = sum_vector.float() / length_tensor.float()  # broadcasting
        return avg_vector

    def forward(self, input_ids, attention_mask, labels, e1_mask, e2_mask):
        outputs = self.model(
            input_ids, attention_mask=attention_mask
        )  # sequence_output, pooled_output, (hidden_states), (attentions)
        sequence_output = outputs[0] #batch, max_len, hidden_size 16, 280, 768 

        # cls_mask = input_ids.eq(0) # cls
        # sentence_representation_cls = sequence_output[cls_mask, :].view(sequence_output.size(0), -1, sequence_output.size(-1))[:,-1,:]
    

        # eos_mask = input_ids.eq(1) # eos token id = 1
        
        # sentence_representation = sequence_output[eos_mask, :].view(sequence_output.size(0), -1, sequence_output.size(-1))[:,-1,:]
    
        e1_h = self.entity_average(sequence_output, e1_mask)
        e2_h = self.entity_average(sequence_output, e2_mask)
        # Dropout -> tanh -> fc_layer (Share FC layer for e1 and e2)
        #sentence_representation_cls = self.cls_fc_layer(sentence_representation_cls)
        sentence_representation = self.eos_fc_layer(outputs.pooler_output)
#         sentence_representation = self.eos_fc_layer(sequence_output[:,0,:])
        e1_h = self.entity_fc_layer1(e1_h)
        e2_h = self.entity_fc_layer2(e2_h)
        # Concat -> fc_layer
        #concat_h = torch.cat([pooled_output, e1_h, e2_h, torch.abs(torch.sub(e1_h,e2_h))], dim=-1)
        concat_h = torch.cat([sentence_representation, e1_h, e2_h], dim=-1)
        logits = self.label_classifier(concat_h)
        outputs = (logits,) + outputs[2:]  # add hidden states and attention if they are here
        # Softmax
        if labels is not None:
            if self.num_labels == 1:
                loss_fct = nn.MSELoss()
                loss = loss_fct(logits.view(-1), labels.view(-1))
            else:
                loss_fct = nn.CrossEntropyLoss()
                #loss_fct = nn.BCEWithLogitsLoss()
                #loss_fct = LabelSmoothingCrossEntropy()
                #loss_fct = Cross_FocalLoss()
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

            outputs = (loss,) + outputs

        return outputs  # (loss), logits, (hidden_states), (attentions)

In [8]:
class Trainer(object):
    def __init__(self,num_labels,logging_steps, save_steps,max_steps,
                 num_train_epochs,warmup_steps,adam_epsilon,learning_rate,gradient_accumulation_steps,
                 max_grad_norm, eval_batch_size, train_batch_size, model_dir, dropout_rate, classifier_epoch,tokenizer,
                 weight_decay,train_dataset=None, dev_dataset=None, test_dataset=None):
        #self.args = args
        self.train_dataset = train_dataset
        self.eval_batch_size = eval_batch_size
        self.train_batch_size = train_batch_size
        self.dev_dataset = dev_dataset
        self.test_dataset = test_dataset
        #self.Model_name = Model_name
        #self.label_lst = label_dict
        self.num_labels = num_labels
        self.max_steps = max_steps
        self.weight_decay = weight_decay
        self.learning_rate = learning_rate
        self.adam_epsilon=adam_epsilon
        self.warmup_steps = warmup_steps
        self.num_train_epochs = num_train_epochs
        self.logging_steps = logging_steps
        self.save_steps = save_steps
        self.max_grad_norm = max_grad_norm
        self.model_dir = model_dir
        self.dropout_rate = dropout_rate
        self.classifier_epoch=classifier_epoch
        self.gradient_accumulation_steps = gradient_accumulation_steps
        self.global_epo=0
        self.config = AutoConfig.from_pretrained(
            "klue/roberta-large",
            num_labels=self.num_labels
        )
        self.model = Roberta_WiC(
           "klue/roberta-large", 
            config=self.config, 
            dropout_rate = self.dropout_rate,
        )

        # GPU or CPU
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model.to(self.device)
        self.tokenizer = tokenizer
        self.best_score = 0
        self.hold_epoch = 0
#         self.model.resize_token_embeddings(len(self.tokenizer))
        
    def train(self):
        train_sampler = RandomSampler(self.train_dataset)
        train_dataloader = DataLoader(
            self.train_dataset,
            sampler=train_sampler,
            batch_size=self.train_batch_size,
        )

        if self.max_steps > 0:
            t_total = self.max_steps
            self.num_train_epochs = (
                self.max_steps // (len(train_dataloader) // self.gradient_accumulation_steps) + 1
            )
        else:
            t_total = len(train_dataloader) // self.gradient_accumulation_steps * self.num_train_epochs

        # Prepare optimizer and schedule (linear warmup and decay)
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],
                "weight_decay": self.weight_decay,
            },
            {
                "params": [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)],
                "weight_decay": 0.0,
            },
        ]
        optimizer = AdamW(
            optimizer_grouped_parameters,
            lr=self.learning_rate,
            eps=self.adam_epsilon,
        )
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=self.warmup_steps,
            num_training_steps=t_total,
        )
        
        #scaler = torch.cuda.amp.GradScaler()
        # Train!
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", len(self.train_dataset))
        logger.info("  Num Epochs = %d", self.num_train_epochs)
        logger.info("  Total train batch size = %d", self.train_batch_size)
        logger.info("  Gradient Accumulation steps = %d", self.gradient_accumulation_steps)
        logger.info("  Total optimization steps = %d", t_total)
        logger.info("  Logging steps = %d", self.logging_steps)
        logger.info("  Save steps = %d", self.save_steps)

        global_step = 0
        tr_loss = 0.0
        self.model.zero_grad()

        train_iterator = tqdm(range(int(self.num_train_epochs)), desc="Epoch")

        for epo_step in train_iterator:
            self.global_epo = epo_step
            epoch_iterator = tqdm(train_dataloader, desc="Iteration")
            for step, batch in enumerate(epoch_iterator):
                self.model.train()
                batch = tuple(batch[t].to(self.device) for t in batch)  # GPU or CPU
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "labels": batch[4],
                    "e1_mask": batch[2],
                    "e2_mask": batch[3]
                }
                #with torch.cuda.amp.autocast():
                outputs = self.model(**inputs)
                loss = outputs[0]

                if self.gradient_accumulation_steps > 1:
                    loss = loss / self.gradient_accumulation_steps

                #scaler.scale(loss).backward()
                loss.backward()

                tr_loss += loss.item()
                if (step + 1) % self.gradient_accumulation_steps == 0:
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.max_grad_norm)

                    optimizer.step()
                    #scaler.step(optimizer)
                    #scaler.update()
                    scheduler.step()  # Update learning rate schedule
                    self.model.zero_grad()
                    global_step += 1

                if self.logging_steps > 0 and global_step % self.logging_steps == 0:
                    logger.info("  global steps = %d", global_step)
#                     self.evaluate("train")  # There is no dev set for semeval task
#                 if self.save_steps > 0 and global_step % self.save_steps == 0:
#                     self.evaluate("dev")
#                     self.save_model()

                if 0 < self.max_steps < global_step:
                    epoch_iterator.close()
                    break
            # epoch 마다
            self.evaluate("dev")
            if self.hold_epoch > 4:
                train_iterator.close()
                break
                
            if 0 < self.max_steps < global_step:
                train_iterator.close()
                break
          

        return global_step, tr_loss / global_step
    
   
    def evaluate(self, mode):
        # We use test dataset because semeval doesn't have dev dataset
        if mode == "test":
            dataset = self.test_dataset
        elif mode == "dev":
            dataset = self.dev_dataset
        elif mode == "train":
            dataset = self.train_dataset
        else:
            raise Exception("Only dev and test dataset available")

        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=self.eval_batch_size)

        # Eval!
        logger.info('---------------------------------------------------')
        logger.info("***** Running evaluation on %s dataset *****", mode)
        logger.info("  Num examples = %d", len(dataset))
        logger.info("  Batch size = %d", self.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None

        self.model.eval()

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            batch = tuple(batch[t].to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "labels": batch[4],
                    "e1_mask": batch[2],
                    "e2_mask": batch[3],
                }
                #with torch.cuda.amp.autocast():
                outputs = self.model(**inputs)
                tmp_eval_loss, logits = outputs[:2]
                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1

            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs["labels"].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0)

        eval_loss = eval_loss / nb_eval_steps
        results = {"loss": eval_loss}
        preds = np.argmax(preds, axis=1)
        # preds = np.around(preds)
        # preds = preds.astype(int)
        result = compute_metrics(preds, out_label_ids)
        
        if mode == "dev":
            if result['acc']>self.best_score:
                self.save_model()
                self.best_score = result['acc']
                print('save new best model acc : ',str(self.best_score))
                self.hold_epoch = 0
            else:
                self.hold_epoch += 1
        
        
        results.update(result)

        logger.info("***** Eval results *****")
        for key in sorted(results.keys()):
            logger.info("  {} = {:.4f}".format(key, results[key]))
            # if key == 'acc':
            #     if results[key] > 0.85:
            #         self.test_pred()
        logger.info("---------------------------------------------------")
        return results
    
    def test_pred(self):
        # train_Dataset
        # eval_batch_size = 16
        # self.model = 
        test_dataset = self.test_dataset
        test_sampler = SequentialSampler(test_dataset)
        test_dataloader = DataLoader(test_dataset, sampler=test_sampler,batch_size=self.eval_batch_size)

        # Eval!
        logger.info("***** Running evaluation on %s dataset *****", "test")
        #logger.info("  Num examples = %d", len(dataset))
        logger.info("  Batch size = %d", self.eval_batch_size)

        nb_eval_steps = 0
        preds = None
        out_label_ids = None

        self.model.eval()

        for batch in tqdm(test_dataloader, desc="Predicting"):
            batch = tuple(batch[t].to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "labels": None,
                    "e1_mask": batch[2],
                    "e2_mask": batch[3],
                }
                outputs = self.model(**inputs)
                #print(outputs)
                pred = outputs[0]

            nb_eval_steps += 1

            if preds is None:
                preds = pred.detach().cpu().numpy()
                #out_label_ids = inputs["labels"].detach().cpu().numpy()
            else:
                preds = np.append(preds, pred.detach().cpu().numpy(), axis=0)
                #out_label_ids = np.append(out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0)

        # preds = np.around(preds)
        preds_label = np.argmax(preds, axis=1)
        df = pd.DataFrame(preds, columns=['pred_0','pred_1'])
        df['label'] = preds_label
        preds = preds.astype(int)
        return df
        # df.to_csv('klueRoberta_layersplit_with_focalcross_epoch'+str(self.global_epo)+'.csv', index=False)
#         with open("proposed_answers.txt", "w", encoding="utf-8") as f:
#             for idx, pred in enumerate(preds):
#                 f.write("{}\n".format(pred))
        #write_prediction(self.args, os.path.join(self.args.eval_dir, "proposed_answers.txt"), preds)
    

    def save_model(self,new_dir=None):
        # Save model checkpoint (Overwrite)
        if not os.path.exists(self.model_dir):
            os.makedirs(self.model_dir)
        if new_dir == None:
            pass
        else:
            if not os.path.exists(new_dir):
                os.makedirs(new_dir)
            self.model_dir = new_dir
        model_to_save = self.model.module if hasattr(self.model, "module") else self.model
        model_to_save.save_pretrained(self.model_dir)

        # Save training arguments together with the trained model
        #torch.save(self.args, os.path.join(self.args.model_dir, "training_args.bin"))
        logger.info("Saving model checkpoint to %s", self.model_dir)

    def load_model(self):
        # Check whether model exists
        if not os.path.exists(self.model_dir):
            raise Exception("Model doesn't exists! Train first!")

        #self.args = torch.load(os.path.join(self.args.model_dir, "training_args.bin"))
        self.model = AutoModel.from_pretrained(self.model_dir)
        self.model.to(self.device)
        logger.info("***** Model Loaded *****")

In [9]:
def init_logger():
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
    )

In [10]:
logger = logging.getLogger(__name__)
init_logger()
#train_dataset = load_data("/opt/ml/input/data/train/train+all.tsv")
train_dataset = load_data("./WIC/Data/NIKL_SKT_WiC_Train.tsv")

#test_dataset = load_data("/content/drive/MyDrive/NIKL/NIKL_WiC/Data/NIKL_SKT_WiC_Dev.tsv")
dev_dataset = load_data("./WIC/Data/NIKL_SKT_WiC_Dev.tsv")
#train_label = train_dataset['label'].values
#train_dataset.columns= ['link','sentence' 'entity_01','e1s','e1e','entity_02','e2s','e2e','label']
# first_sent_start = 's1'
# first_sent_end = 'e1'
# second_sent_start='s2'
# second_sent_end = 'e2'
ADDITIONAL_SPECIAL_TOKENS = ["<e1>", "</e1>", "<e2>", "</e2>"]
#MODEL_NAME = "xlm-roXLMRobertaa-large"
tokenizer = AutoTokenizer.from_pretrained("klue/roberta-large", return_token_type_ids=False)
tokenizer.add_special_tokens({"additional_special_tokens": ADDITIONAL_SPECIAL_TOKENS})

4

In [11]:
dev_dataset['ID'] = dev_dataset['ID']+ 7748

In [12]:
conact_dataset = pd.concat([train_dataset,dev_dataset])
conact_dataset.shape

(8914, 9)

In [13]:
def make_fold(x):
    if x <= conact_dataset.shape[0]*0.2:
        return 0
    elif x > conact_dataset.shape[0]*0.2 and x <= conact_dataset.shape[0]*0.4:
        return 1
    elif x > conact_dataset.shape[0]*0.4 and x <= conact_dataset.shape[0]*0.6 :
        return 2
    elif x > conact_dataset.shape[0]*0.6 and x <= conact_dataset.shape[0]*0.8 :
        return 3
    else:
        return 4
    

In [14]:
conact_dataset['fold']= conact_dataset['ID'].apply(make_fold)
conact_dataset = conact_dataset.drop(['ID', 'Target'],axis=1)

In [15]:
def load_test_data(dataset_dir):
    dataset = pd.read_csv(dataset_dir, delimiter='\t')
    li = []
    for s1, s2 in zip(list(dataset['SENTENCE1']), list(dataset['SENTENCE2'])):
        li.append(s1+' '+s2)
    #dataset['sentence'] = li
    dataset["ANSWER"] = [0] * len(dataset)
    return dataset
test_dataset = load_test_data("./WIC/Data/NIKL_SKT_WiC_Test.tsv")
test_Dataset = convert_sentence_to_features(test_dataset, tokenizer, max_len= 280, mode='eval')

  0%|          | 0/1246 [00:00<?, ?it/s]

246


In [16]:
trn_idx = conact_dataset[conact_dataset['fold'] != 0].index

sample = trn_idx[0:len(trn_idx)//2]

sample.append(trn_idx[0:len(trn_idx)//2])

Int64Index([1782, 1783, 1784, 1785, 1786, 1787, 1788, 1789, 1790, 1791,
            ...
            5338, 5339, 5340, 5341, 5342, 5343, 5344, 5345, 5346, 5347],
           dtype='int64', length=7132)

In [17]:
for fold in tqdm(range(5)): 
    init_logger()
    
    trn_idx = conact_dataset[conact_dataset['fold'] != fold].index
    val_idx = conact_dataset[conact_dataset['fold'] == fold].index
    
    half_val_len = len(val_idx)//2
    add_trn_idx = val_idx[:half_val_len]
    
    trn_idx.append(add_trn_idx)
    val_idx = val_idx[half_val_len:]

    train_folds = conact_dataset.loc[trn_idx].reset_index(drop=True).drop(['fold'],axis=1)
    valid_folds = conact_dataset.loc[val_idx].reset_index(drop=True).drop(['fold'],axis=1)
    
    train_Dataset = convert_sentence_to_features(train_dataset, tokenizer, max_len = 280, mode='train')
    valid_Dataset = convert_sentence_to_features(dev_dataset, tokenizer, max_len= 280, mode='eval')

    trainer = Trainer(eval_batch_size=16,train_batch_size=16, num_labels = 2,
                    max_steps=-1, weight_decay=1e-2, learning_rate= 1e-5, 
                    adam_epsilon=1e-8, warmup_steps=64, num_train_epochs=10,
                    logging_steps=1292, save_steps=2584, max_grad_norm=1.0, 
                    model_dir='./final_klue_roberta_model_'+str(fold),
                    gradient_accumulation_steps=2,dropout_rate = 0.1, 
                    classifier_epoch=3,
                    train_dataset=train_Dataset,
                    dev_dataset=valid_Dataset,
                    test_dataset=test_Dataset,\
                     tokenizer =tokenizer)
    trainer.train()
    trainer.save_model(new_dir='./rbt_model'+)
    print('Save last model')
    result = trainer.test_pred()
    result.to_csv(str(fold)+'_final_klue_roberta_model_result.csv', index=False)
    submission_json = {"wic" : []}
    for i, pred in enumerate(result['label'],1):
        if pred == 1:
            submission_json["wic"].append({"idx" : i, "label" : 'true'})
        else:
            submission_json["wic"].append({"idx" : i, "label" : 'false'})
        
    with open(str(fold)+'_klue_roberta_submission.json', 'w') as fp:
        json.dump(submission_json, fp)

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/7748 [00:00<?, ?it/s]

228


  0%|          | 0/1166 [00:00<?, ?it/s]

200


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it f

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

  item = {key: torch.tensor(val[idx]) for key, val in self.tokenized_dataset.items()}
11/08/2021 16:00:31 - INFO - __main__ -     global steps = 0
11/08/2021 16:07:51 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 16:07:51 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 16:07:51 - INFO - __main__ -     Num examples = 1166
11/08/2021 16:07:51 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 16:08:15 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_0
11/08/2021 16:08:15 - INFO - __main__ -   ***** Eval results *****
11/08/2021 16:08:15 - INFO - __main__ -     acc = 0.8516
11/08/2021 16:08:15 - INFO - __main__ -     loss = 0.3548
11/08/2021 16:08:15 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.8516295025728988


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 16:15:38 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 16:15:38 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 16:15:38 - INFO - __main__ -     Num examples = 1166
11/08/2021 16:15:38 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 16:16:02 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_0
11/08/2021 16:16:02 - INFO - __main__ -   ***** Eval results *****
11/08/2021 16:16:02 - INFO - __main__ -     acc = 0.8791
11/08/2021 16:16:02 - INFO - __main__ -     loss = 0.3328
11/08/2021 16:16:02 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.8790737564322469


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 16:23:24 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 16:23:24 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 16:23:24 - INFO - __main__ -     Num examples = 1166
11/08/2021 16:23:24 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 16:23:48 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_0
11/08/2021 16:23:48 - INFO - __main__ -   ***** Eval results *****
11/08/2021 16:23:48 - INFO - __main__ -     acc = 0.8962
11/08/2021 16:23:48 - INFO - __main__ -     loss = 0.4184
11/08/2021 16:23:48 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.8962264150943396


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 16:31:10 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 16:31:10 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 16:31:10 - INFO - __main__ -     Num examples = 1166
11/08/2021 16:31:10 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 16:31:32 - INFO - __main__ -   ***** Eval results *****
11/08/2021 16:31:32 - INFO - __main__ -     acc = 0.8748
11/08/2021 16:31:32 - INFO - __main__ -     loss = 0.5632
11/08/2021 16:31:32 - INFO - __main__ -   ---------------------------------------------------


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 16:38:53 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 16:38:53 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 16:38:53 - INFO - __main__ -     Num examples = 1166
11/08/2021 16:38:53 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 16:39:15 - INFO - __main__ -   ***** Eval results *****
11/08/2021 16:39:15 - INFO - __main__ -     acc = 0.8962
11/08/2021 16:39:15 - INFO - __main__ -     loss = 0.6435
11/08/2021 16:39:15 - INFO - __main__ -   ---------------------------------------------------


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 16:41:45 - INFO - __main__ -     global steps = 1292
11/08/2021 16:41:45 - INFO - __main__ -     global steps = 1292
11/08/2021 16:46:36 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 16:46:36 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 16:46:36 - INFO - __main__ -     Num examples = 1166
11/08/2021 16:46:36 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 16:47:02 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_0
11/08/2021 16:47:02 - INFO - __main__ -   ***** Eval results *****
11/08/2021 16:47:02 - INFO - __main__ -     acc = 0.9065
11/08/2021 16:47:02 - INFO - __main__ -     loss = 0.6185
11/08/2021 16:47:02 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.9065180102915952


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 16:54:23 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 16:54:23 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 16:54:23 - INFO - __main__ -     Num examples = 1166
11/08/2021 16:54:23 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 16:54:47 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_0
11/08/2021 16:54:47 - INFO - __main__ -   ***** Eval results *****
11/08/2021 16:54:47 - INFO - __main__ -     acc = 0.9108
11/08/2021 16:54:47 - INFO - __main__ -     loss = 0.6026
11/08/2021 16:54:47 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.9108061749571184


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 17:02:07 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 17:02:07 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 17:02:07 - INFO - __main__ -     Num examples = 1166
11/08/2021 17:02:07 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 17:02:31 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_0
11/08/2021 17:02:31 - INFO - __main__ -   ***** Eval results *****
11/08/2021 17:02:31 - INFO - __main__ -     acc = 0.9168
11/08/2021 17:02:31 - INFO - __main__ -     loss = 0.6405
11/08/2021 17:02:31 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.9168096054888508


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 17:09:52 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 17:09:52 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 17:09:52 - INFO - __main__ -     Num examples = 1166
11/08/2021 17:09:52 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 17:10:13 - INFO - __main__ -   ***** Eval results *****
11/08/2021 17:10:13 - INFO - __main__ -     acc = 0.9142
11/08/2021 17:10:13 - INFO - __main__ -     loss = 0.6882
11/08/2021 17:10:13 - INFO - __main__ -   ---------------------------------------------------


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 17:17:34 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 17:17:34 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 17:17:34 - INFO - __main__ -     Num examples = 1166
11/08/2021 17:17:34 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 17:17:56 - INFO - __main__ -   ***** Eval results *****
11/08/2021 17:17:56 - INFO - __main__ -     acc = 0.9151
11/08/2021 17:17:56 - INFO - __main__ -     loss = 0.6966
11/08/2021 17:17:56 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 17:17:58 - INFO - __main__ -   Saving model checkpoint to ./rbt_model0
11/08/2021 17:17:58 - INFO - __main__ -   ***** Running evaluation on test dataset *****
11/08/2021 17:17:58 - INFO - __main__ -     Batch size = 16


Save last model


Predicting:   0%|          | 0/78 [00:00<?, ?it/s]

  0%|          | 0/7748 [00:00<?, ?it/s]

228


  0%|          | 0/1166 [00:00<?, ?it/s]

200


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it f

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 17:18:34 - INFO - __main__ -     global steps = 0
11/08/2021 17:25:55 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 17:25:55 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 17:25:55 - INFO - __main__ -     Num examples = 1166
11/08/2021 17:25:55 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 17:26:19 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_1
11/08/2021 17:26:19 - INFO - __main__ -   ***** Eval results *****
11/08/2021 17:26:19 - INFO - __main__ -     acc = 0.8293
11/08/2021 17:26:19 - INFO - __main__ -     loss = 0.4076
11/08/2021 17:26:19 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.8293310463121784


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 17:33:41 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 17:33:41 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 17:33:41 - INFO - __main__ -     Num examples = 1166
11/08/2021 17:33:41 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 17:34:06 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_1
11/08/2021 17:34:06 - INFO - __main__ -   ***** Eval results *****
11/08/2021 17:34:06 - INFO - __main__ -     acc = 0.9074
11/08/2021 17:34:06 - INFO - __main__ -     loss = 0.2637
11/08/2021 17:34:06 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.9073756432246999


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 17:41:28 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 17:41:28 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 17:41:28 - INFO - __main__ -     Num examples = 1166
11/08/2021 17:41:28 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 17:41:50 - INFO - __main__ -   ***** Eval results *****
11/08/2021 17:41:50 - INFO - __main__ -     acc = 0.9014
11/08/2021 17:41:50 - INFO - __main__ -     loss = 0.3822
11/08/2021 17:41:50 - INFO - __main__ -   ---------------------------------------------------


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 17:49:11 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 17:49:11 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 17:49:11 - INFO - __main__ -     Num examples = 1166
11/08/2021 17:49:11 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 17:49:33 - INFO - __main__ -   ***** Eval results *****
11/08/2021 17:49:33 - INFO - __main__ -     acc = 0.9048
11/08/2021 17:49:33 - INFO - __main__ -     loss = 0.4517
11/08/2021 17:49:33 - INFO - __main__ -   ---------------------------------------------------


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 17:56:55 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 17:56:55 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 17:56:55 - INFO - __main__ -     Num examples = 1166
11/08/2021 17:56:55 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 17:57:20 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_1
11/08/2021 17:57:20 - INFO - __main__ -   ***** Eval results *****
11/08/2021 17:57:20 - INFO - __main__ -     acc = 0.9091
11/08/2021 17:57:20 - INFO - __main__ -     loss = 0.5359
11/08/2021 17:57:20 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.9090909090909091


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 17:59:49 - INFO - __main__ -     global steps = 1292
11/08/2021 17:59:50 - INFO - __main__ -     global steps = 1292
11/08/2021 18:04:41 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 18:04:41 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 18:04:41 - INFO - __main__ -     Num examples = 1166
11/08/2021 18:04:41 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 18:05:07 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_1
11/08/2021 18:05:07 - INFO - __main__ -   ***** Eval results *****
11/08/2021 18:05:07 - INFO - __main__ -     acc = 0.9108
11/08/2021 18:05:07 - INFO - __main__ -     loss = 0.5826
11/08/2021 18:05:07 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.9108061749571184


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 18:12:28 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 18:12:28 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 18:12:28 - INFO - __main__ -     Num examples = 1166
11/08/2021 18:12:28 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 18:12:50 - INFO - __main__ -   ***** Eval results *****
11/08/2021 18:12:50 - INFO - __main__ -     acc = 0.9091
11/08/2021 18:12:50 - INFO - __main__ -     loss = 0.6127
11/08/2021 18:12:50 - INFO - __main__ -   ---------------------------------------------------


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 18:20:10 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 18:20:10 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 18:20:10 - INFO - __main__ -     Num examples = 1166
11/08/2021 18:20:10 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 18:20:35 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_1
11/08/2021 18:20:35 - INFO - __main__ -   ***** Eval results *****
11/08/2021 18:20:35 - INFO - __main__ -     acc = 0.9177
11/08/2021 18:20:35 - INFO - __main__ -     loss = 0.6221
11/08/2021 18:20:35 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.9176672384219554


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 18:27:55 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 18:27:55 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 18:27:55 - INFO - __main__ -     Num examples = 1166
11/08/2021 18:27:55 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 18:28:21 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_1
11/08/2021 18:28:21 - INFO - __main__ -   ***** Eval results *****
11/08/2021 18:28:21 - INFO - __main__ -     acc = 0.9194
11/08/2021 18:28:21 - INFO - __main__ -     loss = 0.6359
11/08/2021 18:28:21 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.9193825042881647


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 18:35:41 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 18:35:41 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 18:35:41 - INFO - __main__ -     Num examples = 1166
11/08/2021 18:35:41 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 18:36:03 - INFO - __main__ -   ***** Eval results *****
11/08/2021 18:36:03 - INFO - __main__ -     acc = 0.9185
11/08/2021 18:36:03 - INFO - __main__ -     loss = 0.6645
11/08/2021 18:36:03 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 18:36:05 - INFO - __main__ -   Saving model checkpoint to ./rbt_model1
11/08/2021 18:36:05 - INFO - __main__ -   ***** Running evaluation on test dataset *****
11/08/2021 18:36:05 - INFO - __main__ -     Batch size = 16


Save last model


Predicting:   0%|          | 0/78 [00:00<?, ?it/s]

  0%|          | 0/7748 [00:00<?, ?it/s]

228


  0%|          | 0/1166 [00:00<?, ?it/s]

200


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it f

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 18:36:42 - INFO - __main__ -     global steps = 0
11/08/2021 18:44:03 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 18:44:03 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 18:44:03 - INFO - __main__ -     Num examples = 1166
11/08/2021 18:44:03 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 18:44:27 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_2
11/08/2021 18:44:27 - INFO - __main__ -   ***** Eval results *****
11/08/2021 18:44:27 - INFO - __main__ -     acc = 0.8593
11/08/2021 18:44:27 - INFO - __main__ -     loss = 0.3523
11/08/2021 18:44:27 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.8593481989708405


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 18:51:49 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 18:51:49 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 18:51:49 - INFO - __main__ -     Num examples = 1166
11/08/2021 18:51:49 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 18:52:14 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_2
11/08/2021 18:52:14 - INFO - __main__ -   ***** Eval results *****
11/08/2021 18:52:14 - INFO - __main__ -     acc = 0.8636
11/08/2021 18:52:14 - INFO - __main__ -     loss = 0.3393
11/08/2021 18:52:14 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.8636363636363636


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 18:59:36 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 18:59:36 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 18:59:36 - INFO - __main__ -     Num examples = 1166
11/08/2021 18:59:36 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 19:00:01 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_2
11/08/2021 19:00:01 - INFO - __main__ -   ***** Eval results *****
11/08/2021 19:00:01 - INFO - __main__ -     acc = 0.8911
11/08/2021 19:00:01 - INFO - __main__ -     loss = 0.3768
11/08/2021 19:00:01 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.8910806174957119


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 19:07:22 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 19:07:22 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 19:07:22 - INFO - __main__ -     Num examples = 1166
11/08/2021 19:07:22 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 19:07:44 - INFO - __main__ -   ***** Eval results *****
11/08/2021 19:07:44 - INFO - __main__ -     acc = 0.8902
11/08/2021 19:07:44 - INFO - __main__ -     loss = 0.4245
11/08/2021 19:07:44 - INFO - __main__ -   ---------------------------------------------------


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 19:15:05 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 19:15:05 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 19:15:05 - INFO - __main__ -     Num examples = 1166
11/08/2021 19:15:05 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 19:15:29 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_2
11/08/2021 19:15:29 - INFO - __main__ -   ***** Eval results *****
11/08/2021 19:15:29 - INFO - __main__ -     acc = 0.9099
11/08/2021 19:15:29 - INFO - __main__ -     loss = 0.4743
11/08/2021 19:15:29 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.9099485420240138


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 19:17:58 - INFO - __main__ -     global steps = 1292
11/08/2021 19:17:59 - INFO - __main__ -     global steps = 1292
11/08/2021 19:22:49 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 19:22:49 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 19:22:49 - INFO - __main__ -     Num examples = 1166
11/08/2021 19:22:49 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 19:23:11 - INFO - __main__ -   ***** Eval results *****
11/08/2021 19:23:11 - INFO - __main__ -     acc = 0.9031
11/08/2021 19:23:11 - INFO - __main__ -     loss = 0.6194
11/08/2021 19:23:11 - INFO - __main__ -   ---------------------------------------------------


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 19:30:32 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 19:30:32 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 19:30:32 - INFO - __main__ -     Num examples = 1166
11/08/2021 19:30:32 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 19:30:54 - INFO - __main__ -   ***** Eval results *****
11/08/2021 19:30:54 - INFO - __main__ -     acc = 0.9082
11/08/2021 19:30:54 - INFO - __main__ -     loss = 0.6188
11/08/2021 19:30:54 - INFO - __main__ -   ---------------------------------------------------


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 19:38:14 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 19:38:14 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 19:38:14 - INFO - __main__ -     Num examples = 1166
11/08/2021 19:38:14 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 19:38:36 - INFO - __main__ -   ***** Eval results *****
11/08/2021 19:38:36 - INFO - __main__ -     acc = 0.9099
11/08/2021 19:38:36 - INFO - __main__ -     loss = 0.6623
11/08/2021 19:38:36 - INFO - __main__ -   ---------------------------------------------------


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 19:45:57 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 19:45:57 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 19:45:57 - INFO - __main__ -     Num examples = 1166
11/08/2021 19:45:57 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 19:46:18 - INFO - __main__ -   ***** Eval results *****
11/08/2021 19:46:18 - INFO - __main__ -     acc = 0.9057
11/08/2021 19:46:18 - INFO - __main__ -     loss = 0.7166
11/08/2021 19:46:18 - INFO - __main__ -   ---------------------------------------------------


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 19:53:39 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 19:53:39 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 19:53:39 - INFO - __main__ -     Num examples = 1166
11/08/2021 19:53:39 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 19:54:01 - INFO - __main__ -   ***** Eval results *****
11/08/2021 19:54:01 - INFO - __main__ -     acc = 0.9099
11/08/2021 19:54:01 - INFO - __main__ -     loss = 0.7126
11/08/2021 19:54:01 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 19:54:03 - INFO - __main__ -   Saving model checkpoint to ./rbt_model2
11/08/2021 19:54:03 - INFO - __main__ -   ***** Running evaluation on test dataset *****
11/08/2021 19:54:03 - INFO - __main__ -     Batch size = 16


Save last model


Predicting:   0%|          | 0/78 [00:00<?, ?it/s]

  0%|          | 0/7748 [00:00<?, ?it/s]

228


  0%|          | 0/1166 [00:00<?, ?it/s]

200


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it f

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 19:54:40 - INFO - __main__ -     global steps = 0
11/08/2021 20:02:01 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 20:02:01 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 20:02:01 - INFO - __main__ -     Num examples = 1166
11/08/2021 20:02:01 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 20:02:25 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_3
11/08/2021 20:02:25 - INFO - __main__ -   ***** Eval results *****
11/08/2021 20:02:25 - INFO - __main__ -     acc = 0.8654
11/08/2021 20:02:25 - INFO - __main__ -     loss = 0.3266
11/08/2021 20:02:25 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.8653516295025729


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 20:09:46 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 20:09:46 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 20:09:46 - INFO - __main__ -     Num examples = 1166
11/08/2021 20:09:46 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 20:10:10 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_3
11/08/2021 20:10:10 - INFO - __main__ -   ***** Eval results *****
11/08/2021 20:10:10 - INFO - __main__ -     acc = 0.8997
11/08/2021 20:10:10 - INFO - __main__ -     loss = 0.2590
11/08/2021 20:10:10 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.8996569468267581


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 20:17:32 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 20:17:32 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 20:17:32 - INFO - __main__ -     Num examples = 1166
11/08/2021 20:17:32 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 20:17:54 - INFO - __main__ -   ***** Eval results *****
11/08/2021 20:17:54 - INFO - __main__ -     acc = 0.8868
11/08/2021 20:17:54 - INFO - __main__ -     loss = 0.4748
11/08/2021 20:17:54 - INFO - __main__ -   ---------------------------------------------------


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 20:25:15 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 20:25:15 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 20:25:15 - INFO - __main__ -     Num examples = 1166
11/08/2021 20:25:15 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 20:25:39 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_3
11/08/2021 20:25:39 - INFO - __main__ -   ***** Eval results *****
11/08/2021 20:25:39 - INFO - __main__ -     acc = 0.9005
11/08/2021 20:25:39 - INFO - __main__ -     loss = 0.5024
11/08/2021 20:25:39 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.9005145797598628


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 20:33:01 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 20:33:01 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 20:33:01 - INFO - __main__ -     Num examples = 1166
11/08/2021 20:33:01 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 20:33:26 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_3
11/08/2021 20:33:26 - INFO - __main__ -   ***** Eval results *****
11/08/2021 20:33:26 - INFO - __main__ -     acc = 0.9108
11/08/2021 20:33:26 - INFO - __main__ -     loss = 0.4603
11/08/2021 20:33:26 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.9108061749571184


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 20:35:55 - INFO - __main__ -     global steps = 1292
11/08/2021 20:35:56 - INFO - __main__ -     global steps = 1292
11/08/2021 20:40:46 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 20:40:46 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 20:40:46 - INFO - __main__ -     Num examples = 1166
11/08/2021 20:40:46 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 20:41:08 - INFO - __main__ -   ***** Eval results *****
11/08/2021 20:41:08 - INFO - __main__ -     acc = 0.8945
11/08/2021 20:41:08 - INFO - __main__ -     loss = 0.6545
11/08/2021 20:41:08 - INFO - __main__ -   ---------------------------------------------------


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 20:48:29 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 20:48:29 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 20:48:29 - INFO - __main__ -     Num examples = 1166
11/08/2021 20:48:29 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 20:48:55 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_3
11/08/2021 20:48:55 - INFO - __main__ -   ***** Eval results *****
11/08/2021 20:48:55 - INFO - __main__ -     acc = 0.9125
11/08/2021 20:48:55 - INFO - __main__ -     loss = 0.5363
11/08/2021 20:48:55 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.9125214408233276


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 20:56:15 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 20:56:15 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 20:56:15 - INFO - __main__ -     Num examples = 1166
11/08/2021 20:56:15 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 20:56:37 - INFO - __main__ -   ***** Eval results *****
11/08/2021 20:56:37 - INFO - __main__ -     acc = 0.9091
11/08/2021 20:56:37 - INFO - __main__ -     loss = 0.6153
11/08/2021 20:56:37 - INFO - __main__ -   ---------------------------------------------------


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 21:03:57 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 21:03:57 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 21:03:57 - INFO - __main__ -     Num examples = 1166
11/08/2021 21:03:57 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 21:04:19 - INFO - __main__ -   ***** Eval results *****
11/08/2021 21:04:19 - INFO - __main__ -     acc = 0.9099
11/08/2021 21:04:19 - INFO - __main__ -     loss = 0.6729
11/08/2021 21:04:19 - INFO - __main__ -   ---------------------------------------------------


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 21:11:40 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 21:11:40 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 21:11:40 - INFO - __main__ -     Num examples = 1166
11/08/2021 21:11:40 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 21:12:02 - INFO - __main__ -   ***** Eval results *****
11/08/2021 21:12:02 - INFO - __main__ -     acc = 0.9057
11/08/2021 21:12:02 - INFO - __main__ -     loss = 0.6884
11/08/2021 21:12:02 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 21:12:04 - INFO - __main__ -   Saving model checkpoint to ./rbt_model3
11/08/2021 21:12:04 - INFO - __main__ -   ***** Running evaluation on test dataset *****
11/08/2021 21:12:04 - INFO - __main__ -     Batch size = 16


Save last model


Predicting:   0%|          | 0/78 [00:00<?, ?it/s]

  0%|          | 0/7748 [00:00<?, ?it/s]

228


  0%|          | 0/1166 [00:00<?, ?it/s]

200


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it f

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 21:12:41 - INFO - __main__ -     global steps = 0
11/08/2021 21:20:01 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 21:20:01 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 21:20:01 - INFO - __main__ -     Num examples = 1166
11/08/2021 21:20:01 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 21:20:25 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_4
11/08/2021 21:20:25 - INFO - __main__ -   ***** Eval results *****
11/08/2021 21:20:25 - INFO - __main__ -     acc = 0.8568
11/08/2021 21:20:25 - INFO - __main__ -     loss = 0.3392
11/08/2021 21:20:25 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.8567753001715266


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 21:27:47 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 21:27:47 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 21:27:47 - INFO - __main__ -     Num examples = 1166
11/08/2021 21:27:47 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 21:28:09 - INFO - __main__ -   ***** Eval results *****
11/08/2021 21:28:09 - INFO - __main__ -     acc = 0.8345
11/08/2021 21:28:09 - INFO - __main__ -     loss = 0.4212
11/08/2021 21:28:09 - INFO - __main__ -   ---------------------------------------------------


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 21:35:30 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 21:35:30 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 21:35:30 - INFO - __main__ -     Num examples = 1166
11/08/2021 21:35:30 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 21:35:56 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_4
11/08/2021 21:35:56 - INFO - __main__ -   ***** Eval results *****
11/08/2021 21:35:56 - INFO - __main__ -     acc = 0.8988
11/08/2021 21:35:56 - INFO - __main__ -     loss = 0.3255
11/08/2021 21:35:56 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.8987993138936535


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 21:43:17 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 21:43:17 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 21:43:17 - INFO - __main__ -     Num examples = 1166
11/08/2021 21:43:17 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 21:43:39 - INFO - __main__ -   ***** Eval results *****
11/08/2021 21:43:39 - INFO - __main__ -     acc = 0.8868
11/08/2021 21:43:39 - INFO - __main__ -     loss = 0.4595
11/08/2021 21:43:39 - INFO - __main__ -   ---------------------------------------------------


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 21:51:00 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 21:51:00 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 21:51:00 - INFO - __main__ -     Num examples = 1166
11/08/2021 21:51:00 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 21:51:22 - INFO - __main__ -   ***** Eval results *****
11/08/2021 21:51:22 - INFO - __main__ -     acc = 0.8945
11/08/2021 21:51:22 - INFO - __main__ -     loss = 0.5487
11/08/2021 21:51:22 - INFO - __main__ -   ---------------------------------------------------


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 21:53:51 - INFO - __main__ -     global steps = 1292
11/08/2021 21:53:52 - INFO - __main__ -     global steps = 1292
11/08/2021 21:58:43 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 21:58:43 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 21:58:43 - INFO - __main__ -     Num examples = 1166
11/08/2021 21:58:43 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 21:59:05 - INFO - __main__ -   ***** Eval results *****
11/08/2021 21:59:05 - INFO - __main__ -     acc = 0.8979
11/08/2021 21:59:05 - INFO - __main__ -     loss = 0.5816
11/08/2021 21:59:05 - INFO - __main__ -   ---------------------------------------------------


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 22:06:26 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 22:06:26 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 22:06:26 - INFO - __main__ -     Num examples = 1166
11/08/2021 22:06:26 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 22:06:51 - INFO - __main__ -   Saving model checkpoint to ./final_klue_roberta_model_4
11/08/2021 22:06:51 - INFO - __main__ -   ***** Eval results *****
11/08/2021 22:06:51 - INFO - __main__ -     acc = 0.9005
11/08/2021 22:06:51 - INFO - __main__ -     loss = 0.6800
11/08/2021 22:06:51 - INFO - __main__ -   ---------------------------------------------------


save new best model acc :  0.9005145797598628


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 22:14:11 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 22:14:11 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 22:14:11 - INFO - __main__ -     Num examples = 1166
11/08/2021 22:14:11 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 22:14:33 - INFO - __main__ -   ***** Eval results *****
11/08/2021 22:14:33 - INFO - __main__ -     acc = 0.8911
11/08/2021 22:14:33 - INFO - __main__ -     loss = 0.7817
11/08/2021 22:14:33 - INFO - __main__ -   ---------------------------------------------------


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 22:21:54 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 22:21:54 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 22:21:54 - INFO - __main__ -     Num examples = 1166
11/08/2021 22:21:54 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 22:22:16 - INFO - __main__ -   ***** Eval results *****
11/08/2021 22:22:16 - INFO - __main__ -     acc = 0.8971
11/08/2021 22:22:16 - INFO - __main__ -     loss = 0.7606
11/08/2021 22:22:16 - INFO - __main__ -   ---------------------------------------------------


Iteration:   0%|          | 0/485 [00:00<?, ?it/s]

11/08/2021 22:29:36 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 22:29:36 - INFO - __main__ -   ***** Running evaluation on dev dataset *****
11/08/2021 22:29:36 - INFO - __main__ -     Num examples = 1166
11/08/2021 22:29:36 - INFO - __main__ -     Batch size = 16


Evaluating:   0%|          | 0/73 [00:00<?, ?it/s]

11/08/2021 22:29:58 - INFO - __main__ -   ***** Eval results *****
11/08/2021 22:29:58 - INFO - __main__ -     acc = 0.8979
11/08/2021 22:29:58 - INFO - __main__ -     loss = 0.7461
11/08/2021 22:29:58 - INFO - __main__ -   ---------------------------------------------------
11/08/2021 22:30:00 - INFO - __main__ -   Saving model checkpoint to ./rbt_model4
11/08/2021 22:30:00 - INFO - __main__ -   ***** Running evaluation on test dataset *****
11/08/2021 22:30:00 - INFO - __main__ -     Batch size = 16


Save last model


Predicting:   0%|          | 0/78 [00:00<?, ?it/s]