<a href="https://colab.research.google.com/github/EnaJeong/study-KoELECTRA_fine_tuning/blob/main/KoELECTRA_seq_cls_KorSTS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# KorSTS

주어진 두 문장의 유사도를 0부터 5사이의 값으로 측정

데이터: [KorNLUDatasets](https://github.com/kakaobrain/KorNLUDatasets)



In [1]:
!pip install attrdict
!pip install transformers

Collecting attrdict
  Downloading https://files.pythonhosted.org/packages/ef/97/28fe7e68bc7adfce67d4339756e85e9fcf3c6fd7f0c0781695352b70472c/attrdict-2.0.1-py2.py3-none-any.whl
Installing collected packages: attrdict
Successfully installed attrdict-2.0.1
Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/81/91/61d69d58a1af1bd81d9ca9d62c90a6de3ab80d77f27c5df65d9a2c1f5626/transformers-4.5.0-py3-none-any.whl (2.1MB)
[K     |████████████████████████████████| 2.2MB 16.7MB/s 
[?25hCollecting tokenizers<0.11,>=0.10.1
[?25l  Downloading https://files.pythonhosted.org/packages/ae/04/5b870f26a858552025a62f1649c20d29d2672c02ff3c3fb4c688ca46467a/tokenizers-0.10.2-cp37-cp37m-manylinux2010_x86_64.whl (3.3MB)
[K     |████████████████████████████████| 3.3MB 38.4MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/08/cd/342e584ee544d044fb573ae697404ce22ede086c9e87ce5960772084cad0/sacremoses-0.0.44.tar.gz (862kB)
[K     |██████████

In [2]:
import argparse
import json
import logging
import os
import glob

import numpy as np
import torch
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from fastprogress.fastprogress import master_bar, progress_bar

In [3]:
from attrdict import AttrDict

In [4]:
from transformers import AdamW, get_linear_schedule_with_warmup
from transformers import ElectraConfig, ElectraTokenizer, ElectraForSequenceClassification

## Utils

In [5]:
from scipy.stats import pearsonr, spearmanr

In [6]:
def init_logger():
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
    )

평가 지표 계산 : pearson, spearman 상관계수

In [7]:
def compute_metrics(labels, preds):
    assert len(preds) == len(labels)

    pearson_corr = pearsonr(preds, labels)[0]
    spearman_corr = spearmanr(preds, labels)[0]
    
    return {
        "pearson": pearson_corr,
        "spearmanr": spearman_corr,
        "corr": (pearson_corr + spearman_corr) / 2,
    }

## 데이터 전처리

In [8]:
import copy
from torch.utils.data import TensorDataset

### 데이터 구조

```
genre	filename	year	id	score	sentence1	sentence2
main-captions	MSRvid	2012test	0001	5.000	비행기가 이륙하고 있다.	비행기가 이륙하고 있다.
```

### 코드

데이터에서 score, sentence1, sentence2 추출

In [9]:
class KorSTSProcessor(object):
    """Processor for the KorSTS data set """

    def __init__(self, args):
        self.args = args

    def get_labels(self):
        return [None]

    @classmethod
    def _read_file(cls, input_file):
        """Reads a tab separated value file."""
        with open(input_file, "r", encoding="utf-8") as f:
            lines = []
            for line in f:
                lines.append(line.strip())
            return lines

    def _create_examples(self, lines, set_type):
        """Creates examples for the training and dev sets."""
        examples = []
        for (i, line) in enumerate(lines[1:]):
            line = line.split("\t")
            guid = f"{set_type}-{i}"
            text_a = line[5]
            text_b = line[6]
            label = line[4]   # score

            examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
            
            if i % 1000 == 0:
                logger.info(line)
            
        return examples

    def get_examples(self, mode):
        """
        Args:
            mode: train, dev, test
        """
        file_to_read = None
        if mode == "train":
            file_to_read = self.args.train_file  # Only mnli for training
        elif mode == "dev":
            file_to_read = self.args.dev_file
        elif mode == "test":
            file_to_read = self.args.test_file

        logger.info("LOOKING AT {}".format(os.path.join(self.args.data_dir, self.args.task, file_to_read)))
        
        return self._create_examples(
            self._read_file(os.path.join(self.args.data_dir, self.args.task, file_to_read)), mode
        )

class InputExample(object):
    """
    A single training/test example for simple sequence classification.
    """

    def __init__(self, guid, text_a, text_b, label):
        self.guid = guid
        self.text_a = text_a
        self.text_b = text_b
        self.label = label

    def __repr__(self):
        return str(self.to_json_string())

    def to_dict(self):
        """Serializes this instance to a Python dictionary."""
        output = copy.deepcopy(self.__dict__)
        return output

    def to_json_string(self):
        """Serializes this instance to a JSON string."""
        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"

정답 label 생성 & 입력값 embedding

In [10]:
class InputFeatures(object):
    """A single set of features of data."""

    def __init__(self, input_ids, attention_mask, token_type_ids, label):
        self.input_ids = input_ids
        self.attention_mask = attention_mask
        self.token_type_ids = token_type_ids
        self.label = label

    def __repr__(self):
        return str(self.to_json_string())

    def to_dict(self):
        """Serializes this instance to a Python dictionary."""
        output = copy.deepcopy(self.__dict__)
        return output

    def to_json_string(self):
        """Serializes this instance to a JSON string."""
        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"

# `task`와 `processor`는 classification에서 label list 생성 및 로그 출력에 사용
def seq_cls_convert_examples_to_features(args, examples, tokenizer, max_length, task=None, processor=None):
    
    # 정답 label 생성
    labels = None

    if OUTPUT_MODE == "classification":
        label_list = processor.get_labels()
        logger.info("Using label list {} for task {}".format(label_list, task))

        label_map = {label: i for i, label in enumerate(label_list)}
        labels = [label_map[example.label] for example in examples]

    elif OUTPUT_MODE == "regression":
        labels = [float(example.label) for example in examples]

    raise KeyError(OUTPUT_MODE)

    # feature encoding
    batch_encoding = tokenizer.batch_encode_plus(
        [(example.text_a, example.text_b) for example in examples],
        max_length=max_length,
        padding="max_length",
        add_special_tokens=True,
        truncation=True,
    )

    features = []
    for i in range(len(examples)):
        inputs = {k: batch_encoding[k][i] for k in batch_encoding}
        feature = InputFeatures(**inputs, label=labels[i])
        features.append(feature)

    # log
    for i, example in enumerate(examples[:5]):
        logger.info("*** Example ***")
        logger.info("guid: {}".format(example.guid))
        logger.info("input_ids: {}".format(" ".join([str(x) for x in features[i].input_ids])))
        logger.info("attention_mask: {}".format(" ".join([str(x) for x in features[i].attention_mask])))
        logger.info("token_type_ids: {}".format(" ".join([str(x) for x in features[i].token_type_ids])))
        logger.info("label: {}".format(features[i].label))

    return features

## 훈련

데이터 파일 로드
1. 최초 호출 시 전처리 후 cach 파일로 저장
2. cash 파일이 있는 경우 저장된 파일 로드

Tensor로 변경한 후 dataset 생성

In [11]:
def load_and_cache_examples(args, tokenizer, mode):
    processor = PROCESSOR(args)

    # Load data features from cache or dataset file
    cached_features_file = os.path.join(
        args.data_dir,
        "cached_{}_{}_{}_{}".format(
            str(args.task), list(filter(None, args.model_name_or_path.split("/"))).pop(), str(args.max_seq_len), mode
        ),
    )
    
    if os.path.exists(cached_features_file):
        logger.info("Loading features from cached file %s", cached_features_file)
        features = torch.load(cached_features_file)
    else:
        logger.info("Creating features from dataset file at %s", args.data_dir)
        if mode in ("train", "dev", "test"):
            examples = processor.get_examples(mode)
        else:
            raise ValueError("For mode, only train, dev, test is avaiable")

        features = seq_cls_convert_examples_to_features(
            args, examples, tokenizer, max_length=args.max_seq_len, task=args.task, processor=processor
        )
        logger.info("Saving features into cached file %s", cached_features_file)
        torch.save(features, cached_features_file)

    # Convert to Tensors and build dataset
    all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
    all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long)
    all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long)

    if OUTPUT_MODE == "classification":
        all_labels = torch.tensor([f.label for f in features], dtype=torch.long)
    elif OUTPUT_MODE == "regression":
        all_labels = torch.tensor([f.label for f in features], dtype=torch.float)

    dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, all_labels)
    return dataset

학습

In [12]:
def train(args, model, train_dataset, dev_dataset=None, test_dataset=None):
    train_sampler = RandomSampler(train_dataset)
    train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size)

    if args.max_steps > 0:
        t_total = args.max_steps
        args.num_train_epochs = args.max_steps // (len(train_dataloader) // args.gradient_accumulation_steps) + 1
    else: 
        t_total = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs
    
    # optimizer, schedule 준비
    no_decay = ['bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
         'weight_decay': args.weight_decay},
        {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 
         'weight_decay': 0.0}
    ]
    optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=int(t_total * args.warmup_proportion), num_training_steps=t_total)

    if os.path.isfile(os.path.join(args.model_name_or_path, "optimizer.pt")) \
        and os.path.isfile(os.path.join(args.model_name_or_path, "scheduler.pt")):
        # optimizer, scheduler states 로드
        optimizer.load_state_dict(torch.load(os.path.join(args.model_name_or_path, "optimizer.pt")))
        scheduler.load_state_dict(torch.load(os.path.join(args.model_name_or_path, "scheduler.pt")))
    
    # log
    logger.info("***** Running training *****")
    logger.info("  Num examples = %d", len(train_dataset))
    logger.info("  Num Epochs = %d", args.num_train_epochs)
    logger.info("  Total train batch size = %d", args.train_batch_size)
    logger.info("  Gradient Accumulation steps = %d", args.gradient_accumulation_steps)
    logger.info("  Total optimization steps = %d", t_total)
    logger.info("  Logging steps = %d", args.logging_steps)
    logger.info("  Save steps = %d", args.save_steps)

    # 변수 초기화
    global_step = 0
    tr_loss = 0.0

    model.zero_grad()
    mb = master_bar(range(int(args.num_train_epochs)))

    # epoch 수행
    for epoch in mb:
        epoch_iterator = progress_bar(train_dataloader, parent=mb)

        for step, batch in enumerate(epoch_iterator):
            model.train()   # Sets the module in training mode.
            batch = tuple(t.to(args.device) for t in batch)

            # transformer 4. 버전에서는 ouput의 기본 형태가 dict이다.
            inputs = {
                "input_ids" : batch[0],
                "attention_mask" : batch[1],
                "token_type_ids" : batch[2],
                "labels" : batch[3]
            }
            outputs = model(**inputs)
            loss = outputs['loss']

            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps
            
            # loss 역전파
            loss.backward()
            tr_loss += loss.item()

            # 파라미터 업데이트
            if (step + 1) % args.gradient_accumulation_steps == 0 or (
                len(train_dataloader) <= args.gradient_accumulation_steps
                and (step + 1) == len(train_dataloader)
            ):
                torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)

                optimizer.step()
                scheduler.step()
                model.zero_grad()
                global_step +=1

                # 평가
                if args.logging_steps > 0 and global_step % args.logging_steps == 0:
                    if args.evaluate_test_during_training:
                        evaluate(args, model, test_dataset, "test", global_step)
                    else:
                        evaluate(args, model, test_dataset, "dev", global_step)
                
                # 모델 및 optimizer의 현재 상태 저장
                if args.save_steps > 0 and global_step % args.save_steps == 0:
                    
                    output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step))
                    if not os.path.exists(output_dir):
                        os.makedirs(output_dir)

                    model_to_save = (model.module if hasattr(model, "module") else model)
                    model_to_save.save_pretrained(output_dir)

                    torch.save(args, os.path.join(output_dir, "training_args.bin"))
                    logger.info("Saving model checkpoint to {}".format(output_dir))

                    if args.save_optimizer:
                        torch.save(optimizer.state_dict(), os.path.join(output_dir, "optimizer.pt"))
                        torch.save(scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt"))
                        logger.info("Saving optimizer and scheduler states to {}".format(output_dir))

            if args.max_steps > 0 and global_step > args.max_steps:
                break

        mb.write("Epoch {} done".format(epoch + 1))

        if args.max_steps > 0 and global_step > args.max_steps:
            break

    return global_step, tr_loss / global_step

평가

In [14]:
def evaluate(args, model, eval_dataset, mode, global_step=None):
    results = {}
    eval_sampler = SequentialSampler(eval_dataset)
    eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)

    # 로그
    if global_step != None:
        logger.info(f"***** Running evaluation on {mode} dataset ({global_step} step) *****")
    else:
        logger.info(f"***** Running evaluation on {mode} dataset *****")
    logger.info(f"  Num examples = {len(eval_dataset)}")
    logger.info(f"  Eval Batch size = {args.eval_batch_size}")

    # 변수 초기화
    eval_loss = 0.0
    nb_eval_steps = 0
    preds = None
    out_label_ids = None

    # 예측값 생성
    for batch in progress_bar(eval_dataloader):
        model.eval()    # Sets the module in evaluation mode.
        batch = tuple(t.to(args.device) for t in batch)

        with torch.no_grad():
            # transformer 4. 버전에서는 ouput의 기본 형태가 dict이다.
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "token_type_ids": batch[2],
                "labels": batch[3]
            }
            outputs = model(**inputs)
            tmp_eval_loss, logits = outputs['loss'], outputs['logits']

            eval_loss += tmp_eval_loss.mean().item()

        nb_eval_steps += 1
        if preds is None:
            preds = logits.detach().cpu().numpy()
            out_label_ids = inputs["labels"].detach().cpu().numpy()
        else:
            preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
            out_label_ids = np.append(out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0)

    eval_loss = eval_loss / nb_eval_steps

    if OUTPUT_MODE == "classification":
        preds = np.argmax(preds, axis=1)
    elif OUTPUT_MODE == "regression":
        preds = np.squeeze(preds)

    # 평가 지표 계산
    result = compute_metrics(out_label_ids, preds)
    results.update(result)

    # 평가 결과 저장
    output_dir = os.path.join(args.output_dir, mode)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    output_eval_file = os.path.join(output_dir, f"{mode}-{global_step}.txt" if global_step else f"{model}.txt")
    with open(output_eval_file, "w") as f_w:
        logger.info(f"***** Eval results on {mode} dataset *****")
        for key in sorted(results.keys()):
            logger.info(f"  {key} = {str(results[key])}")
            f_w.write("  {} = {}\n".format(key, str(results[key])))

    return results

In [15]:
def main(config_file):
    # Read from config file and make args
    with open(config_file) as f:
        args = AttrDict(json.load(f))
    logger.info(f"Training/evaluation parameters {args}")

    args.output_dir = os.path.join(args.ckpt_dir, args.output_dir)

    init_logger()

    # Config, Tokenizer, model (ElectraForSequenceClassification) 사전 훈련 모델 읽어오기
    if OUTPUT_MODE == "regression":
        config = CONFIG_CLASS.from_pretrained(
            args.model_name_or_path,
            num_labels=1    # regression은 label 하나
        )
    else:
        processor = PROCESSOR(args)
        labels = processor.get_labels()

        config = CONFIG_CLASS.from_pretrained(
            args.model_name_or_path,
            num_labels=TASK_NUM_LABELS,
            id2label={str(i): label for i, label in enumerate(labels)},
            label2id={label: i for i, label in enumerate(labels)},
        )

    tokenizer = TOKENIZER_CLASS.from_pretrained(
        args.model_name_or_path,
        do_lower_case=args.do_lower_case
    )

    model = MODEL_FOR_SEQUENCE_CLASSIFICATION.from_pretrained(
        args.model_name_or_path,
        config=config
    )

    # GPU or CPU
    args.device = "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu"
    model.to(args.device)

    # Load dataset
    train_dataset = load_and_cache_examples(args, tokenizer, mode="train") if args.train_file else None
    dev_dataset = load_and_cache_examples(args, tokenizer, mode="dev") if args.dev_file else None
    test_dataset = load_and_cache_examples(args, tokenizer, mode="test") if args.test_file else None

    if dev_dataset == None:
        args.evaluate_test_during_training = True  # If there is no dev dataset, only use testset

    if args.do_train:
        global_step, tr_loss = train(args, model, train_dataset, dev_dataset, test_dataset)
        logger.info(" global_step = {}, average loss = {}".format(global_step, tr_loss))

    # 최종 평가 결과 생성
    results = {}
    if args.do_eval:
        checkpoints = list(
            os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + "/**/" + "pytorch_model.bin", recursive=True))
        )
        if not args.eval_all_checkpoints:
            checkpoints = checkpoints[-1:]
        else:
            logging.getLogger("transformers.configuration_utils").setLevel(logging.WARN)  # Reduce logging
            logging.getLogger("transformers.modeling_utils").setLevel(logging.WARN)  # Reduce logging

        logger.info("Evaluate the following checkpoints: %s", checkpoints)
        for checkpoint in checkpoints:
            global_step = checkpoint.split("-")[-1]
            model = MODEL_FOR_SEQUENCE_CLASSIFICATION.from_pretrained(checkpoint)
            model.to(args.device)
            result = evaluate(args, model, test_dataset, mode="test", global_step=global_step)
            result = dict((k + "_{}".format(global_step), v) for k, v in result.items())
            results.update(result)

        output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
        with open(output_eval_file, "w") as f_w:
            for key in sorted(results.keys()):
                f_w.write("{} = {}\n".format(key, str(results[key])))

## 실행

In [17]:
CONFIG_FILE = './KorSTS_koelectra-base-v3.json'

PROCESSOR = KorSTSProcessor
OUTPUT_MODE = "regression"
# TASK_NUM_LABELS = 1
CONFIG_CLASS = ElectraConfig
TOKENIZER_CLASS = ElectraTokenizer
MODEL_FOR_SEQUENCE_CLASSIFICATION = ElectraForSequenceClassification

logger = logging.getLogger()

main(CONFIG_FILE)

04/07/2021 03:56:03 - INFO - __main__ -   Training/evaluation parameters AttrDict({'task': 'korsts', 'data_dir': '/content/drive/MyDrive/datasets', 'ckpt_dir': '/content/drive/MyDrive/KoElectra/ckpt', 'train_file': 'sts-train.tsv', 'dev_file': 'sts-dev.tsv', 'test_file': 'sts-test.tsv', 'evaluate_test_during_training': False, 'eval_all_checkpoints': True, 'save_optimizer': False, 'do_lower_case': False, 'do_train': True, 'do_eval': True, 'max_seq_len': 128, 'num_train_epochs': 10, 'weight_decay': 0.0, 'gradient_accumulation_steps': 1, 'adam_epsilon': 1e-08, 'warmup_proportion': 0, 'max_steps': -1, 'max_grad_norm': 1.0, 'no_cuda': False, 'model_type': 'koelectra-base-v3', 'model_name_or_path': 'monologg/koelectra-base-v3-discriminator', 'output_dir': '/content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt', 'seed': 42, 'train_batch_size': 32, 'eval_batch_size': 64, 'logging_steps': 100, 'save_steps': 100, 'learning_rate': 5e-05})
Some weights of the model checkpoint at monologg/

04/07/2021 03:57:33 - INFO - __main__ -   ***** Running evaluation on dev dataset (100 step) *****
04/07/2021 03:57:33 - INFO - __main__ -     Num examples = 1379
04/07/2021 03:57:33 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 03:57:44 - INFO - __main__ -   ***** Eval results on dev dataset *****
04/07/2021 03:57:44 - INFO - __main__ -     corr = 0.8333034058187612
04/07/2021 03:57:44 - INFO - __main__ -     pearson = 0.8334452679903397
04/07/2021 03:57:44 - INFO - __main__ -     spearmanr = 0.8331615436471826
04/07/2021 03:57:46 - INFO - __main__ -   Saving model checkpoint to /content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt/checkpoint-100
04/07/2021 03:58:57 - INFO - __main__ -   ***** Running evaluation on dev dataset (200 step) *****
04/07/2021 03:58:57 - INFO - __main__ -     Num examples = 1379
04/07/2021 03:58:57 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 03:59:08 - INFO - __main__ -   ***** Eval results on dev dataset *****
04/07/2021 03:59:08 - INFO - __main__ -     corr = 0.8427940793836847
04/07/2021 03:59:08 - INFO - __main__ -     pearson = 0.8444352480585545
04/07/2021 03:59:08 - INFO - __main__ -     spearmanr = 0.8411529107088148
04/07/2021 03:59:10 - INFO - __main__ -   Saving model checkpoint to /content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt/checkpoint-200
04/07/2021 04:00:22 - INFO - __main__ -   ***** Running evaluation on dev dataset (300 step) *****
04/07/2021 04:00:22 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:00:22 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:00:33 - INFO - __main__ -   ***** Eval results on dev dataset *****
04/07/2021 04:00:33 - INFO - __main__ -     corr = 0.8396509455031782
04/07/2021 04:00:33 - INFO - __main__ -     pearson = 0.8391420650099211
04/07/2021 04:00:33 - INFO - __main__ -     spearmanr = 0.8401598259964352
04/07/2021 04:00:35 - INFO - __main__ -   Saving model checkpoint to /content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt/checkpoint-300
04/07/2021 04:01:46 - INFO - __main__ -   ***** Running evaluation on dev dataset (400 step) *****
04/07/2021 04:01:46 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:01:46 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:01:57 - INFO - __main__ -   ***** Eval results on dev dataset *****
04/07/2021 04:01:57 - INFO - __main__ -     corr = 0.8601597231957789
04/07/2021 04:01:57 - INFO - __main__ -     pearson = 0.8618601207912893
04/07/2021 04:01:57 - INFO - __main__ -     spearmanr = 0.8584593256002685
04/07/2021 04:01:59 - INFO - __main__ -   Saving model checkpoint to /content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt/checkpoint-400
04/07/2021 04:03:11 - INFO - __main__ -   ***** Running evaluation on dev dataset (500 step) *****
04/07/2021 04:03:11 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:03:11 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:03:22 - INFO - __main__ -   ***** Eval results on dev dataset *****
04/07/2021 04:03:22 - INFO - __main__ -     corr = 0.8468642157333137
04/07/2021 04:03:22 - INFO - __main__ -     pearson = 0.8486116964976741
04/07/2021 04:03:22 - INFO - __main__ -     spearmanr = 0.8451167349689535
04/07/2021 04:03:23 - INFO - __main__ -   Saving model checkpoint to /content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt/checkpoint-500
04/07/2021 04:04:35 - INFO - __main__ -   ***** Running evaluation on dev dataset (600 step) *****
04/07/2021 04:04:35 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:04:35 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:04:46 - INFO - __main__ -   ***** Eval results on dev dataset *****
04/07/2021 04:04:46 - INFO - __main__ -     corr = 0.8586976088097559
04/07/2021 04:04:46 - INFO - __main__ -     pearson = 0.8598354284934916
04/07/2021 04:04:46 - INFO - __main__ -     spearmanr = 0.8575597891260202
04/07/2021 04:04:48 - INFO - __main__ -   Saving model checkpoint to /content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt/checkpoint-600
04/07/2021 04:05:59 - INFO - __main__ -   ***** Running evaluation on dev dataset (700 step) *****
04/07/2021 04:05:59 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:05:59 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:06:10 - INFO - __main__ -   ***** Eval results on dev dataset *****
04/07/2021 04:06:10 - INFO - __main__ -     corr = 0.8588770430883147
04/07/2021 04:06:10 - INFO - __main__ -     pearson = 0.8610388678072917
04/07/2021 04:06:10 - INFO - __main__ -     spearmanr = 0.8567152183693377
04/07/2021 04:06:12 - INFO - __main__ -   Saving model checkpoint to /content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt/checkpoint-700
04/07/2021 04:07:23 - INFO - __main__ -   ***** Running evaluation on dev dataset (800 step) *****
04/07/2021 04:07:23 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:07:23 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:07:35 - INFO - __main__ -   ***** Eval results on dev dataset *****
04/07/2021 04:07:35 - INFO - __main__ -     corr = 0.8572399949407405
04/07/2021 04:07:35 - INFO - __main__ -     pearson = 0.8604208973258877
04/07/2021 04:07:35 - INFO - __main__ -     spearmanr = 0.8540590925555934
04/07/2021 04:07:36 - INFO - __main__ -   Saving model checkpoint to /content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt/checkpoint-800
04/07/2021 04:08:48 - INFO - __main__ -   ***** Running evaluation on dev dataset (900 step) *****
04/07/2021 04:08:48 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:08:48 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:08:59 - INFO - __main__ -   ***** Eval results on dev dataset *****
04/07/2021 04:08:59 - INFO - __main__ -     corr = 0.8629370711891939
04/07/2021 04:08:59 - INFO - __main__ -     pearson = 0.8641173066693771
04/07/2021 04:08:59 - INFO - __main__ -     spearmanr = 0.8617568357090108
04/07/2021 04:09:01 - INFO - __main__ -   Saving model checkpoint to /content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt/checkpoint-900
04/07/2021 04:10:12 - INFO - __main__ -   ***** Running evaluation on dev dataset (1000 step) *****
04/07/2021 04:10:12 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:10:12 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:10:23 - INFO - __main__ -   ***** Eval results on dev dataset *****
04/07/2021 04:10:23 - INFO - __main__ -     corr = 0.8592801133000374
04/07/2021 04:10:23 - INFO - __main__ -     pearson = 0.8601060917610485
04/07/2021 04:10:23 - INFO - __main__ -     spearmanr = 0.8584541348390263
04/07/2021 04:10:25 - INFO - __main__ -   Saving model checkpoint to /content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt/checkpoint-1000
04/07/2021 04:11:36 - INFO - __main__ -   ***** Running evaluation on dev dataset (1100 step) *****
04/07/2021 04:11:36 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:11:36 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:11:47 - INFO - __main__ -   ***** Eval results on dev dataset *****
04/07/2021 04:11:47 - INFO - __main__ -     corr = 0.8607580866173739
04/07/2021 04:11:47 - INFO - __main__ -     pearson = 0.8628642923832649
04/07/2021 04:11:47 - INFO - __main__ -     spearmanr = 0.8586518808514828
04/07/2021 04:11:50 - INFO - __main__ -   Saving model checkpoint to /content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt/checkpoint-1100
04/07/2021 04:13:01 - INFO - __main__ -   ***** Running evaluation on dev dataset (1200 step) *****
04/07/2021 04:13:01 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:13:01 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:13:12 - INFO - __main__ -   ***** Eval results on dev dataset *****
04/07/2021 04:13:12 - INFO - __main__ -     corr = 0.8584098725393596
04/07/2021 04:13:12 - INFO - __main__ -     pearson = 0.8596635548196404
04/07/2021 04:13:12 - INFO - __main__ -     spearmanr = 0.8571561902590789
04/07/2021 04:13:14 - INFO - __main__ -   Saving model checkpoint to /content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt/checkpoint-1200
04/07/2021 04:14:26 - INFO - __main__ -   ***** Running evaluation on dev dataset (1300 step) *****
04/07/2021 04:14:26 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:14:26 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:14:37 - INFO - __main__ -   ***** Eval results on dev dataset *****
04/07/2021 04:14:37 - INFO - __main__ -     corr = 0.8600639867985419
04/07/2021 04:14:37 - INFO - __main__ -     pearson = 0.8612436790453076
04/07/2021 04:14:37 - INFO - __main__ -     spearmanr = 0.8588842945517762
04/07/2021 04:14:39 - INFO - __main__ -   Saving model checkpoint to /content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt/checkpoint-1300
04/07/2021 04:15:50 - INFO - __main__ -   ***** Running evaluation on dev dataset (1400 step) *****
04/07/2021 04:15:50 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:15:50 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:16:01 - INFO - __main__ -   ***** Eval results on dev dataset *****
04/07/2021 04:16:01 - INFO - __main__ -     corr = 0.8612513532974548
04/07/2021 04:16:01 - INFO - __main__ -     pearson = 0.8627196406678505
04/07/2021 04:16:01 - INFO - __main__ -     spearmanr = 0.8597830659270593
04/07/2021 04:16:03 - INFO - __main__ -   Saving model checkpoint to /content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt/checkpoint-1400
04/07/2021 04:17:15 - INFO - __main__ -   ***** Running evaluation on dev dataset (1500 step) *****
04/07/2021 04:17:15 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:17:15 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:17:26 - INFO - __main__ -   ***** Eval results on dev dataset *****
04/07/2021 04:17:26 - INFO - __main__ -     corr = 0.8617517784147037
04/07/2021 04:17:26 - INFO - __main__ -     pearson = 0.8630189743420115
04/07/2021 04:17:26 - INFO - __main__ -     spearmanr = 0.8604845824873961
04/07/2021 04:17:28 - INFO - __main__ -   Saving model checkpoint to /content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt/checkpoint-1500
04/07/2021 04:18:39 - INFO - __main__ -   ***** Running evaluation on dev dataset (1600 step) *****
04/07/2021 04:18:39 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:18:39 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:18:50 - INFO - __main__ -   ***** Eval results on dev dataset *****
04/07/2021 04:18:50 - INFO - __main__ -     corr = 0.8612508261920853
04/07/2021 04:18:50 - INFO - __main__ -     pearson = 0.862775588620725
04/07/2021 04:18:50 - INFO - __main__ -     spearmanr = 0.8597260637634455
04/07/2021 04:18:52 - INFO - __main__ -   Saving model checkpoint to /content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt/checkpoint-1600
04/07/2021 04:20:03 - INFO - __main__ -   ***** Running evaluation on dev dataset (1700 step) *****
04/07/2021 04:20:03 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:20:03 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:20:14 - INFO - __main__ -   ***** Eval results on dev dataset *****
04/07/2021 04:20:14 - INFO - __main__ -     corr = 0.8614555312610136
04/07/2021 04:20:14 - INFO - __main__ -     pearson = 0.8626256953801005
04/07/2021 04:20:14 - INFO - __main__ -     spearmanr = 0.8602853671419267
04/07/2021 04:20:16 - INFO - __main__ -   Saving model checkpoint to /content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt/checkpoint-1700
04/07/2021 04:21:28 - INFO - __main__ -   ***** Running evaluation on dev dataset (1800 step) *****
04/07/2021 04:21:28 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:21:28 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:21:39 - INFO - __main__ -   ***** Eval results on dev dataset *****
04/07/2021 04:21:39 - INFO - __main__ -     corr = 0.8610732726227514
04/07/2021 04:21:39 - INFO - __main__ -     pearson = 0.862520716318629
04/07/2021 04:21:39 - INFO - __main__ -     spearmanr = 0.8596258289268737
04/07/2021 04:21:40 - INFO - __main__ -   Saving model checkpoint to /content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt/checkpoint-1800
04/07/2021 04:21:41 - INFO - __main__ -    global_step = 1800, average loss = 0.2571479117849635
04/07/2021 04:21:41 - INFO - __main__ -   Evaluate the following checkpoints: ['/content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt/checkpoint-100', '/content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt/checkpoint-1000', '/content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt/checkpoint-1100', '/content/drive/MyDrive/KoElectra/koelectra-base-v3-korsts-ckpt/checkpoint-1200', '/content/drive/MyDrive/KoElectra/koelectra-base-v

04/07/2021 04:22:01 - INFO - __main__ -   ***** Eval results on test dataset *****
04/07/2021 04:22:01 - INFO - __main__ -     corr = 0.8333034058187612
04/07/2021 04:22:01 - INFO - __main__ -     pearson = 0.8334452679903397
04/07/2021 04:22:01 - INFO - __main__ -     spearmanr = 0.8331615436471826
04/07/2021 04:22:17 - INFO - __main__ -   ***** Running evaluation on test dataset (1000 step) *****
04/07/2021 04:22:17 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:22:17 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:22:28 - INFO - __main__ -   ***** Eval results on test dataset *****
04/07/2021 04:22:28 - INFO - __main__ -     corr = 0.8592801133000374
04/07/2021 04:22:28 - INFO - __main__ -     pearson = 0.8601060917610485
04/07/2021 04:22:28 - INFO - __main__ -     spearmanr = 0.8584541348390263
04/07/2021 04:22:43 - INFO - __main__ -   ***** Running evaluation on test dataset (1100 step) *****
04/07/2021 04:22:43 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:22:43 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:22:54 - INFO - __main__ -   ***** Eval results on test dataset *****
04/07/2021 04:22:54 - INFO - __main__ -     corr = 0.8607580866173739
04/07/2021 04:22:54 - INFO - __main__ -     pearson = 0.8628642923832649
04/07/2021 04:22:54 - INFO - __main__ -     spearmanr = 0.8586518808514828
04/07/2021 04:23:09 - INFO - __main__ -   ***** Running evaluation on test dataset (1200 step) *****
04/07/2021 04:23:09 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:23:09 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:23:20 - INFO - __main__ -   ***** Eval results on test dataset *****
04/07/2021 04:23:20 - INFO - __main__ -     corr = 0.8584098725393596
04/07/2021 04:23:20 - INFO - __main__ -     pearson = 0.8596635548196404
04/07/2021 04:23:20 - INFO - __main__ -     spearmanr = 0.8571561902590789
04/07/2021 04:23:35 - INFO - __main__ -   ***** Running evaluation on test dataset (1300 step) *****
04/07/2021 04:23:35 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:23:35 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:23:45 - INFO - __main__ -   ***** Eval results on test dataset *****
04/07/2021 04:23:45 - INFO - __main__ -     corr = 0.8600639867985419
04/07/2021 04:23:45 - INFO - __main__ -     pearson = 0.8612436790453076
04/07/2021 04:23:45 - INFO - __main__ -     spearmanr = 0.8588842945517762
04/07/2021 04:24:01 - INFO - __main__ -   ***** Running evaluation on test dataset (1400 step) *****
04/07/2021 04:24:01 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:24:01 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:24:11 - INFO - __main__ -   ***** Eval results on test dataset *****
04/07/2021 04:24:11 - INFO - __main__ -     corr = 0.8612513532974548
04/07/2021 04:24:11 - INFO - __main__ -     pearson = 0.8627196406678505
04/07/2021 04:24:11 - INFO - __main__ -     spearmanr = 0.8597830659270593
04/07/2021 04:24:27 - INFO - __main__ -   ***** Running evaluation on test dataset (1500 step) *****
04/07/2021 04:24:27 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:24:27 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:24:37 - INFO - __main__ -   ***** Eval results on test dataset *****
04/07/2021 04:24:37 - INFO - __main__ -     corr = 0.8617517784147037
04/07/2021 04:24:37 - INFO - __main__ -     pearson = 0.8630189743420115
04/07/2021 04:24:37 - INFO - __main__ -     spearmanr = 0.8604845824873961
04/07/2021 04:24:52 - INFO - __main__ -   ***** Running evaluation on test dataset (1600 step) *****
04/07/2021 04:24:52 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:24:52 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:25:02 - INFO - __main__ -   ***** Eval results on test dataset *****
04/07/2021 04:25:02 - INFO - __main__ -     corr = 0.8612508261920853
04/07/2021 04:25:02 - INFO - __main__ -     pearson = 0.862775588620725
04/07/2021 04:25:02 - INFO - __main__ -     spearmanr = 0.8597260637634455
04/07/2021 04:25:18 - INFO - __main__ -   ***** Running evaluation on test dataset (1700 step) *****
04/07/2021 04:25:18 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:25:18 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:25:28 - INFO - __main__ -   ***** Eval results on test dataset *****
04/07/2021 04:25:28 - INFO - __main__ -     corr = 0.8614555312610136
04/07/2021 04:25:28 - INFO - __main__ -     pearson = 0.8626256953801005
04/07/2021 04:25:28 - INFO - __main__ -     spearmanr = 0.8602853671419267
04/07/2021 04:25:43 - INFO - __main__ -   ***** Running evaluation on test dataset (1800 step) *****
04/07/2021 04:25:43 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:25:43 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:25:53 - INFO - __main__ -   ***** Eval results on test dataset *****
04/07/2021 04:25:53 - INFO - __main__ -     corr = 0.8610732726227514
04/07/2021 04:25:53 - INFO - __main__ -     pearson = 0.862520716318629
04/07/2021 04:25:53 - INFO - __main__ -     spearmanr = 0.8596258289268737
04/07/2021 04:26:02 - INFO - __main__ -   ***** Running evaluation on test dataset (200 step) *****
04/07/2021 04:26:02 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:26:02 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:26:12 - INFO - __main__ -   ***** Eval results on test dataset *****
04/07/2021 04:26:12 - INFO - __main__ -     corr = 0.8427940793836847
04/07/2021 04:26:12 - INFO - __main__ -     pearson = 0.8444352480585545
04/07/2021 04:26:12 - INFO - __main__ -     spearmanr = 0.8411529107088148
04/07/2021 04:26:21 - INFO - __main__ -   ***** Running evaluation on test dataset (300 step) *****
04/07/2021 04:26:21 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:26:21 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:26:32 - INFO - __main__ -   ***** Eval results on test dataset *****
04/07/2021 04:26:32 - INFO - __main__ -     corr = 0.8396509455031782
04/07/2021 04:26:32 - INFO - __main__ -     pearson = 0.8391420650099211
04/07/2021 04:26:32 - INFO - __main__ -     spearmanr = 0.8401598259964352
04/07/2021 04:26:42 - INFO - __main__ -   ***** Running evaluation on test dataset (400 step) *****
04/07/2021 04:26:42 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:26:42 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:26:52 - INFO - __main__ -   ***** Eval results on test dataset *****
04/07/2021 04:26:52 - INFO - __main__ -     corr = 0.8601597231957789
04/07/2021 04:26:52 - INFO - __main__ -     pearson = 0.8618601207912893
04/07/2021 04:26:52 - INFO - __main__ -     spearmanr = 0.8584593256002685
04/07/2021 04:27:01 - INFO - __main__ -   ***** Running evaluation on test dataset (500 step) *****
04/07/2021 04:27:01 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:27:01 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:27:11 - INFO - __main__ -   ***** Eval results on test dataset *****
04/07/2021 04:27:11 - INFO - __main__ -     corr = 0.8468642157333137
04/07/2021 04:27:11 - INFO - __main__ -     pearson = 0.8486116964976741
04/07/2021 04:27:11 - INFO - __main__ -     spearmanr = 0.8451167349689535
04/07/2021 04:27:20 - INFO - __main__ -   ***** Running evaluation on test dataset (600 step) *****
04/07/2021 04:27:20 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:27:20 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:27:30 - INFO - __main__ -   ***** Eval results on test dataset *****
04/07/2021 04:27:30 - INFO - __main__ -     corr = 0.8586976088097559
04/07/2021 04:27:30 - INFO - __main__ -     pearson = 0.8598354284934916
04/07/2021 04:27:30 - INFO - __main__ -     spearmanr = 0.8575597891260202
04/07/2021 04:27:40 - INFO - __main__ -   ***** Running evaluation on test dataset (700 step) *****
04/07/2021 04:27:40 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:27:40 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:27:50 - INFO - __main__ -   ***** Eval results on test dataset *****
04/07/2021 04:27:50 - INFO - __main__ -     corr = 0.8588770430883147
04/07/2021 04:27:50 - INFO - __main__ -     pearson = 0.8610388678072917
04/07/2021 04:27:50 - INFO - __main__ -     spearmanr = 0.8567152183693377
04/07/2021 04:27:59 - INFO - __main__ -   ***** Running evaluation on test dataset (800 step) *****
04/07/2021 04:27:59 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:27:59 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:28:09 - INFO - __main__ -   ***** Eval results on test dataset *****
04/07/2021 04:28:09 - INFO - __main__ -     corr = 0.8572399949407405
04/07/2021 04:28:09 - INFO - __main__ -     pearson = 0.8604208973258877
04/07/2021 04:28:09 - INFO - __main__ -     spearmanr = 0.8540590925555934
04/07/2021 04:28:19 - INFO - __main__ -   ***** Running evaluation on test dataset (900 step) *****
04/07/2021 04:28:19 - INFO - __main__ -     Num examples = 1379
04/07/2021 04:28:19 - INFO - __main__ -     Eval Batch size = 64


04/07/2021 04:28:29 - INFO - __main__ -   ***** Eval results on test dataset *****
04/07/2021 04:28:29 - INFO - __main__ -     corr = 0.8629370711891939
04/07/2021 04:28:29 - INFO - __main__ -     pearson = 0.8641173066693771
04/07/2021 04:28:29 - INFO - __main__ -     spearmanr = 0.8617568357090108
