<a href="https://colab.research.google.com/github/Cheeyoung-Yoon/upstage_test/blob/main/upstage_model_selection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pickle as pickle
import os
import pandas as pd
import torch


class RE_Dataset(torch.utils.data.Dataset):
  """ Dataset 구성을 위한 class."""
  def __init__(self, pair_dataset, labels):
    self.pair_dataset = pair_dataset
    self.labels = labels

  def __getitem__(self, idx):
    item = {key: val[idx].clone().detach() for key, val in self.pair_dataset.items()}
    item['labels'] = torch.tensor(self.labels[idx])
    return item

  def __len__(self):
    return len(self.labels)

def preprocessing_dataset(dataset):
  """ 처음 불러온 csv 파일을 원하는 형태의 DataFrame으로 변경 시켜줍니다."""
  subject_entity = []
  object_entity = []
  for i,j in zip(dataset['subject_entity'], dataset['object_entity']):
    i = i[1:-1].split(',')[0].split(':')[1]
    j = j[1:-1].split(',')[0].split(':')[1]

    subject_entity.append(i)
    object_entity.append(j)
  out_dataset = pd.DataFrame({'id':dataset['id'], 'sentence':dataset['sentence'],'subject_entity':subject_entity,'object_entity':object_entity,'label':dataset['label'],})
  return out_dataset

def load_data(dataset_dir):
  """ csv 파일을 경로에 맡게 불러 옵니다. """
  pd_dataset = pd.read_csv(dataset_dir)
  dataset = preprocessing_dataset(pd_dataset)

  return dataset


def tokenized_dataset(dataset, tokenizer, use_type_markers=True, use_unk=True, max_len=256):
    """
    dataset: pandas.DataFrame with columns:
      - sentence
      - subject_entity, object_entity  (dict-like str: {'word':..., 'type':...})
    """
    import ast

    def parse_ent(e):
        if isinstance(e, str):
            try:
                e = ast.literal_eval(e)
            except:
                return None, None
        if isinstance(e, dict):
            return e.get("word"), e.get("type")
        return None, None

    enc_inputs, enc_texts = [], []

    for s_ent, o_ent, sent in zip(dataset['subject_entity'], dataset['object_entity'], dataset['sentence']):
        s_word, s_type = parse_ent(s_ent)
        o_word, o_type = parse_ent(o_ent)

        # 단어가 누락된 경우 안전장치
        s_word = s_word if s_word else "<SUBJ>"
        o_word = o_word if o_word else "<OBJ>"

        if use_type_markers:
            if not s_type and use_unk: s_type = "UNK"
            if not o_type and use_unk: o_type = "UNK"

            if s_type and o_type:
                e_span = f"[E1-{s_type}]{s_word}[/E1] [E2-{o_type}]{o_word}[/E2]"
            else:
                # 타입을 전혀 모르면 타입 없는 일반 마커 사용
                e_span = f"[E1]{s_word}[/E1] [E2]{o_word}[/E2]"
        else:
            # 타입 마커 비활성화: 일반 마커만
            e_span = f"[E1]{s_word}[/E1] [E2]{o_word}[/E2]"

        enc_inputs.append(e_span)
        enc_texts.append(sent)

    # 필요 시 특수 토큰 등록 (한 번만 실행)
    # 타입 마커/일반 마커/종료 마커 + UNK
    special_tokens = {"additional_special_tokens": [
        "[E1]","[/E1]","[E2]","[/E2]",
        "[E1-PER]","[E2-PER]","[E1-ORG]","[E2-ORG]",
        "[E1-LOC]","[E2-LOC]","[E1-UNK]","[E2-UNK]"
    ]}
    num_added = tokenizer.add_special_tokens(special_tokens)
    # model.resize_token_embeddings(len(tokenizer))  # 모델 로드 후 1회 실행

    return tokenizer(
        enc_inputs,
        enc_texts,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=max_len,
        add_special_tokens=True,
    )



In [2]:
import os, pickle, numpy as np, pandas as pd, torch, sklearn
from dataclasses import dataclass
from typing import List, Optional, Dict, Any
from sklearn.metrics import accuracy_score
from transformers import (
    AutoTokenizer, AutoConfig, AutoModelForSequenceClassification,
    Trainer, TrainingArguments
)
from transformers import EarlyStoppingCallback
from sklearn.model_selection import train_test_split
from transformers import TrainerCallback

class PrintEvalF1Callback(TrainerCallback):
    def on_evaluate(self, args, state, control, metrics=None, **kwargs):
        if metrics and "eval_micro f1 score" in metrics:
            f1 = metrics["eval_micro f1 score"]
            print(f"[Step {state.global_step}] F1 = {f1:.3f}")
# ===== Metrics (model-agnostic, dynamic num_labels) =====
def micro_f1_wo_no_relation(preds, labels, label_list: List[str], no_rel: str = "no_relation"):
    no_rel_idx = label_list.index(no_rel)
    use_labels = list(range(len(label_list)))
    use_labels.remove(no_rel_idx)
    return sklearn.metrics.f1_score(labels, preds, average="micro", labels=use_labels) * 100.0

def auprc_all(probs, labels, num_labels: int):
    labels_oh = np.eye(num_labels)[labels]
    score = np.zeros((num_labels,), dtype=np.float32)
    for c in range(num_labels):
        targets_c = labels_oh[:, c]
        preds_c = probs[:, c]
        p, r, _ = sklearn.metrics.precision_recall_curve(targets_c, preds_c)
        score[c] = sklearn.metrics.auc(r, p)
    return float(np.mean(score) * 100.0)

def make_compute_metrics(label_list: List[str], no_rel: str = "no_relation"):
    num_labels = len(label_list)
    def _compute(eval_pred):
        logits = eval_pred.predictions
        probs  = logits if logits.ndim == 2 else logits[0]
        preds  = probs.argmax(-1)
        labels = eval_pred.label_ids
        return {
            "micro f1 score": micro_f1_wo_no_relation(preds, labels, label_list, no_rel),
            "auprc": auprc_all(probs, labels, num_labels),
            "accuracy": accuracy_score(labels, preds),
        }
    return _compute

# ===== Config =====
DEFAULT_LABEL_LIST = [
    'no_relation', 'org:top_members/employees', 'org:members', 'org:product', 'per:title',
    'org:alternate_names', 'per:employee_of', 'org:place_of_headquarters', 'per:product',
    'org:number_of_employees/members', 'per:children', 'per:place_of_residence',
    'per:alternate_names', 'per:other_family', 'per:colleagues', 'per:origin',
    'per:siblings', 'per:spouse', 'org:founded', 'org:political/religious_affiliation',
    'org:member_of', 'per:parents', 'org:dissolved', 'per:schools_attended',
    'per:date_of_death', 'per:date_of_birth', 'per:place_of_birth', 'per:place_of_death',
    'org:founded_by', 'per:religion'
]

@dataclass
class TrainConfig:
    model_name: str = "klue/bert-base"          # BERT / RoBERTa / ELECTRA 모두 OK
    output_dir: str = "./results"
    num_train_epochs: int = 10
    learning_rate: float = 5e-5
    per_device_train_batch_size: int = 16
    per_device_eval_batch_size: int = 16
    warmup_steps: int = 500
    weight_decay: float = 0.01
    logging_steps: int = 100
    save_steps: int = 500
    eval_steps: int = 500
    save_total_limit: int = 5
    load_best_model_at_end: bool = True
    seed: int = 42
    max_length: int = 256
    fp16: bool = False                         # True로 주면 A100/3090 등에서 mixed precision
    special_tokens: Optional[List[str]] = None # 예: ["[E1]","[/E1]","[E2]","[/E2]"]


# ===== Main train function =====
def train_re(
    train_csv: str,
    dev_csv: Optional[str] = None,
    label_list: List[str] = DEFAULT_LABEL_LIST,
    cfg: TrainConfig = TrainConfig(),
    label_map_path: str = 'dict_label_to_num.pkl',
    save_best_to: str = "./best_model",
):
    torch.manual_seed(cfg.seed)
    np.random.seed(cfg.seed)

    # ---- 1) Tokenizer / Model (순서 중요) ----
    tokenizer = AutoTokenizer.from_pretrained(cfg.model_name, use_fast=True)

    # [FIX] 마커 특수토큰을 먼저 추가
    added = 0
    if cfg.special_tokens:
        added = tokenizer.add_special_tokens({"additional_special_tokens": cfg.special_tokens})
        if added > 0:
            print(f"[info] added {added} special tokens")

    num_labels = len(label_list)
    model_config = AutoConfig.from_pretrained(cfg.model_name, num_labels=num_labels)
    model = AutoModelForSequenceClassification.from_pretrained(cfg.model_name, config=model_config)

    # [FIX] 특수토큰 추가했으면 반드시 임베딩 리사이즈
    if added > 0:
        model.resize_token_embeddings(len(tokenizer))

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    # ---- 2) Load data & label mapping ----
    # train_df = load_data(train_csv)
    full_df = load_data(train_csv)

    if dev_csv is None:
        train_df, dev_df = train_test_split(
            full_df,
            test_size=0.1,
            random_state=cfg.seed,
            stratify=full_df['label'] if 'label' in full_df else None
        )
    else:
        train_df = full_df
        dev_df = load_data(dev_csv)
    label_map = {label: idx for idx, label in enumerate(label_list)}

    # [FIX] 라벨이 리스트 밖이면 KeyError 방지
    try:
        train_y = [label_map[v] for v in train_df['label'].values]
        dev_y   = [label_map[v] for v in dev_df['label'].values]
    except KeyError as e:
        missing = set(train_df['label'].unique()) - set(label_list)
        raise ValueError(f"Found labels not in label_list: {missing}") from e

    # ---- 3) Tokenize (최종 tokenizer로!) ----
    tokenized_train = tokenized_dataset(train_df, tokenizer)
    # [FIX] RoBERTa 호환: token_type_ids 제거(있으면)
    if isinstance(tokenized_train, dict):
        tokenized_train.pop("token_type_ids", None)

    tokenized_train.pop("token_type_ids", None)

    with torch.no_grad():
        emb = model.get_input_embeddings()
        vocab_size = emb.weight.size(0)
        max_id = int(tokenized_train["input_ids"].max().item())
        print(f"[check] vocab_size={vocab_size}, max_input_id={max_id}")

        if max_id >= vocab_size:
            # 디버그: 어떤 토큰들이 범위를 넘는지 확인
            ids = tokenized_train["input_ids"].view(-1)
            bad_ids = ids[ids >= vocab_size].unique().tolist()
            bad_toks = [tokenizer.convert_ids_to_tokens(int(i)) for i in bad_ids]
            print(f"[warn] out-of-vocab ids: {bad_ids}")
            print(f"[warn] out-of-vocab tokens: {bad_toks}")

            # 1) 가장 보수적인 즉시 복구: 임베딩을 입력의 최대 id+1 로 리사이즈
            new_size = max_id + 1
            print(f"[fix] resize embeddings to {new_size}")
            model.resize_token_embeddings(new_size)
            vocab_size = new_size  # 갱신
    RE_train = RE_Dataset(tokenized_train, train_y)
    # ★ dev도 공통 경로에서 생성 (자동 split이든 파일이든 동일 처리)
    tokenized_dev = tokenized_dataset(dev_df, tokenizer)
    tokenized_dev.pop("token_type_ids", None)
    with torch.no_grad():
        max_id_dev = int(tokenized_dev["input_ids"].max().item())
        if max_id_dev >= vocab_size:
            raise RuntimeError(
                f"[dev] Input id ({max_id_dev}) >= embedding size ({vocab_size}). "
                f"Did tokenizer change after tokenizing?"
            )
    RE_dev = RE_Dataset(tokenized_dev, dev_y)

    # if dev_csv is not None:
    #     dev_df = load_data(dev_csv)
    #     try:
    #         dev_y = [label_map[v] for v in dev_df['label'].values]
    #     except KeyError as e:
    #         missing = set(dev_df['label'].unique()) - set(label_list)
    #         raise ValueError(f"[dev] labels not in label_list: {missing}") from e

    #     tokenized_dev = tokenized_dataset(dev_df, tokenizer)
    #     tokenized_dev.pop("token_type_ids", None)
    #     # dev에서도 안전검사(선택)
    #     with torch.no_grad():
    #         max_id_dev = int(tokenized_dev["input_ids"].max().item())
    #         if max_id_dev >= vocab_size:
    #             raise RuntimeError(
    #                 f"[dev] Input id ({max_id_dev}) >= embedding size ({vocab_size}). "
    #                 f"Did tokenizer change after tokenizing?"
    #             )
    #     RE_dev = RE_Dataset(tokenized_dev, dev_y)
    # else:
    #     RE_dev = None

    # ---- 4) TrainingArguments (HF 4.55 API: eval_strategy 사용) ----
    has_dev = RE_dev is not None
    evaluation_strategy = 'steps' if RE_dev is not None else 'no'

    training_args = TrainingArguments(
        output_dir=cfg.output_dir,
        save_total_limit=cfg.save_total_limit,
        save_steps=cfg.save_steps,
        num_train_epochs=cfg.num_train_epochs,
        learning_rate=cfg.learning_rate,
        per_device_train_batch_size=cfg.per_device_train_batch_size,
        per_device_eval_batch_size=cfg.per_device_eval_batch_size,
        warmup_steps=cfg.warmup_steps,
        weight_decay=cfg.weight_decay,
        logging_dir=os.path.join(cfg.output_dir, "logs"),
        logging_steps=cfg.logging_steps,
        logging_strategy="steps",
        eval_strategy=evaluation_strategy,            # ← 이름 주의
        eval_steps=cfg.eval_steps if has_dev else None,
        load_best_model_at_end=cfg.load_best_model_at_end if has_dev else False,
        metric_for_best_model="micro f1 score" if has_dev else None,  # ← EarlyStopping용
        greater_is_better=True,
        seed=cfg.seed,
        fp16=cfg.fp16,
        remove_unused_columns=False,
        dataloader_pin_memory=torch.cuda.is_available(),
        report_to="none",
    )

    # === 5) Trainer ===
    compute_metrics = make_compute_metrics(label_list, no_rel="no_relation") if has_dev else None

    callbacks = []
    if has_dev and training_args.metric_for_best_model and training_args.load_best_model_at_end:
        callbacks.append(EarlyStoppingCallback(
            early_stopping_patience=2,
            early_stopping_threshold=0.002,
        ))
    # dev 없으면 EarlyStopping 미사용
    callbacks.append(PrintEvalF1Callback())
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=RE_train,
        eval_dataset=RE_dev,
        compute_metrics=compute_metrics,
        tokenizer=tokenizer,
        callbacks=callbacks,
    )

    # ---- 6) Train ----
    trainer.train()

    # ---- 7) Save best (or final) model ----
    os.makedirs(save_best_to, exist_ok=True)
    trainer.save_model(save_best_to)
    if trainer.tokenizer is not None:
        trainer.tokenizer.save_pretrained(save_best_to)

    print(f"Model saved to: {save_best_to}")
    return trainer



In [3]:
# grid_runner.py
import os, gc, traceback, json, time
import torch
import pandas as pd
from dataclasses import replace
from typing import List, Dict, Any, Optional
import shutil
# 당신이 제공한 train_re, TrainConfig, DEFAULT_LABEL_LIST 를 import
# from train_module import train_re, TrainConfig, DEFAULT_LABEL_LIST

def run_grid(
    train_csv: str,
    dev_csv: Optional[str],
    models: List[str],
    lrs: List[float] = (5e-5, 3e-5, 2e-5),
    epochs: List[int] = (5, 10),
    train_bsz: List[int] = (16,),
    eval_bsz: List[int] = (16,),
    seed: int = 42,
    base_out: str = "./grid_runs",
    label_list: List[str] = None,
    use_fp16_if_cuda: bool = True,
    special_tokens: Optional[List[str]] = ["[E1]","[/E1]","[E2]","[/E2]",
                                           "[E1-PER]","[E2-PER]","[E1-ORG]","[E2-ORG]",
                                           "[E1-LOC]","[E2-LOC]","[E1-UNK]","[E2-UNK]"],
) -> pd.DataFrame:
    """
    여러 모델/하이퍼파라미터 조합을 순차적으로 학습하고 dev 성능을 수집하여 DataFrame으로 반환.
    실패한 러나는 error 컬럼에 스택트레이스를 남김.
    """
    if label_list is None:
        from __main__ import DEFAULT_LABEL_LIST as _DEF  # 노트북에서 직접 실행 대비
        label_list = _DEF

    os.makedirs(base_out, exist_ok=True)
    results: List[Dict[str, Any]] = []

    combo_idx = 0
    total = len(models) * len(lrs) * len(epochs) * len(train_bsz)
    print(f"[grid] total runs: {total}")

    for model_name in models:
        for lr in lrs:
            for ep in epochs:
                for bsz in train_bsz:
                    combo_idx += 1
                    run_name = f"{model_name.replace('/','_')}_lr{lr:g}_ep{ep}_bs{bsz}"
                    out_dir = os.path.join(base_out, run_name)
                    best_dir = os.path.join(out_dir, "best")

                    print(f"\n[grid {combo_idx}/{total}] {run_name}")

                    # CUDA/메모리 정리
                    if torch.cuda.is_available():
                        torch.cuda.empty_cache()
                    gc.collect()

                    cfg = TrainConfig(
                        model_name=model_name,
                        output_dir=out_dir,
                        num_train_epochs=ep,
                        learning_rate=lr,
                        per_device_train_batch_size=bsz,
                        per_device_eval_batch_size=eval_bsz[0],
                        warmup_steps=0,
                        weight_decay=0.01,
                        logging_steps=500,
                        save_steps=500,
                        eval_steps=500 if dev_csv else None,
                        save_total_limit=3,
                        load_best_model_at_end=bool(dev_csv),
                        seed=seed,
                        max_length=256,
                        fp16=torch.cuda.is_available() and use_fp16_if_cuda,
                        special_tokens=special_tokens,
                    )

                    row: Dict[str, Any] = {
                        "model": model_name, "lr": lr, "epochs": ep, "train_bsz": bsz,
                        "output_dir": out_dir, "best_dir": best_dir,
                        "micro_f1": None, "auprc": None, "accuracy": None,
                        "best_ckpt": None, "error": None, "seconds": None,
                    }

                    t0 = time.time()
                    try:
                        trainer = train_re(
                            train_csv=train_csv,
                            dev_csv=dev_csv,
                            label_list=label_list,
                            cfg=cfg,
                            save_best_to=best_dir,
                        )

                        # dev가 있으면 evaluate로 표준화된 측정
                        if dev_csv:
                            metrics = trainer.evaluate()
                            # 키가 "micro f1 score"로 들어오므로 공백 제거한 alias도 만들어 둠
                            row["micro_f1"] = metrics.get("micro f1 score")
                            row["auprc"]    = metrics.get("auprc")
                            row["accuracy"] = metrics.get("accuracy")
                        else:
                            # dev 없을 경우 마지막 train logs에서 꺼내거나 None
                            pass

                        # best checkpoint 경로
                        state = getattr(trainer, "state", None)
                        row["best_ckpt"] = getattr(state, "best_model_checkpoint", None)

                    except Exception as e:
                        row["error"] = f"{type(e).__name__}: {e}\n" + traceback.format_exc(limit=2)
                        print("[error]", row["error"])
                    finally:
                        row["seconds"] = round(time.time() - t0, 2)
                        results.append(row)

                        # GPU 메모리 정리
                        if torch.cuda.is_available():
                            torch.cuda.empty_cache()
                        gc.collect()
                        shutil.rmtree(out_dir)
    df = pd.DataFrame(results).sort_values(
        by=["micro_f1", "auprc", "accuracy"], ascending=False, na_position="last"
    ).reset_index(drop=True)

    # CSV로도 저장
    csv_path = os.path.join(base_out, "grid_summary.csv")
    df.to_csv(csv_path, index=False, encoding="utf-8-sig")
    df.to_csv('/content/drive/MyDrive/Colab Notebooks/upstage/model_test_result.csv', mode='a')
    print(f"\n[grid] summary saved: {csv_path}")
    return df




In [4]:
MODELS = [
    # "klue/bert-base",
    # "klue/roberta-base",
    # "monologg/koelectra-base-v3-discriminator",
    "bert-base-multilingual-cased",
    "kykim/bert-kor-base",
    "monologg/koelectra-base-v3",
    "BM-K/KoSimCSE-roberta-multitask"
    # 필요시 추가
]

df = run_grid(
    train_csv="/content/drive/MyDrive/Colab Notebooks/upstage/dataset/train.csv",
    dev_csv=None,     # dev가 없으면 None
    models=MODELS,
    lrs=[5e-5, 3e-5, 2e-5],
    epochs=[5],
    train_bsz=[16, 32],
    base_out="./grid_runs_re",
)
df.to_csv('/content/drive/MyDrive/Colab Notebooks/upstage/model_test_result.csv', mode='a')


[grid] total runs: 24

[grid 1/24] bert-base-multilingual-cased_lr5e-05_ep5_bs16


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

[info] added 12 special tokens


model.safetensors:   0%|          | 0.00/714M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


[check] vocab_size=119559, max_input_id=119558


  trainer = Trainer(


Step,Training Loss,Validation Loss,Micro f1 score,Auprc,Accuracy
500,2.2091,1.834618,37.996366,20.471117,0.44903
1000,1.7815,1.651807,45.850719,25.12019,0.493379
1500,1.6117,1.577887,49.219099,27.737963,0.52202
2000,1.481,1.495235,50.670365,34.068577,0.536495
2500,1.3772,1.451343,50.07694,35.779648,0.521404
3000,1.3338,1.441789,50.760946,37.802132,0.536495
3500,1.3041,1.367965,53.377894,39.549332,0.561749
4000,1.1525,1.386496,54.329147,40.789425,0.561133
4500,1.0541,1.346083,55.427669,40.575634,0.572528
5000,1.0756,1.348588,55.536481,40.155384,0.565753


[Step 500] F1 = 37.996
[Step 1000] F1 = 45.851
[Step 1500] F1 = 49.219
[Step 2000] F1 = 50.670
[Step 2500] F1 = 50.077
[Step 3000] F1 = 50.761
[Step 3500] F1 = 53.378
[Step 4000] F1 = 54.329
[Step 4500] F1 = 55.428
[Step 5000] F1 = 55.536
[Step 5500] F1 = 56.020
[Step 6000] F1 = 55.055
[Step 6500] F1 = 55.931
[Step 7000] F1 = 55.387
[Step 7500] F1 = 55.820
[Step 8000] F1 = 55.226
[Step 8500] F1 = 55.206
[Step 9000] F1 = 55.665


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Model saved to: ./grid_runs_re/bert-base-multilingual-cased_lr5e-05_ep5_bs16/best

[grid 2/24] bert-base-multilingual-cased_lr5e-05_ep5_bs32
[info] added 12 special tokens


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[check] vocab_size=119559, max_input_id=119558


  trainer = Trainer(


Step,Training Loss,Validation Loss,Micro f1 score,Auprc,Accuracy
500,1.8951,1.576701,48.984099,29.217525,0.508777
1000,1.4787,1.419982,53.648556,35.642059,0.556822
1500,1.2721,1.382956,53.021978,38.832607,0.553434
2000,1.1669,1.347562,54.740608,41.289547,0.565137
2500,1.0148,1.330359,56.477987,41.674892,0.572528
3000,0.885,1.377592,57.030223,42.38142,0.58454
3500,0.7945,1.343769,57.514075,42.705563,0.590699
4000,0.6591,1.428506,57.161458,43.187611,0.587003
4500,0.618,1.423031,57.22392,42.961372,0.589159


[Step 500] F1 = 48.984
[Step 1000] F1 = 53.649
[Step 1500] F1 = 53.022
[Step 2000] F1 = 54.741
[Step 2500] F1 = 56.478
[Step 3000] F1 = 57.030
[Step 3500] F1 = 57.514
[Step 4000] F1 = 57.161
[Step 4500] F1 = 57.224


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Model saved to: ./grid_runs_re/bert-base-multilingual-cased_lr5e-05_ep5_bs32/best

[grid 3/24] bert-base-multilingual-cased_lr3e-05_ep5_bs16
[info] added 12 special tokens


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[check] vocab_size=119559, max_input_id=119558


  trainer = Trainer(


Step,Training Loss,Validation Loss,Micro f1 score,Auprc,Accuracy
500,2.0977,1.793607,40.030519,22.642046,0.467816
1000,1.6668,1.532431,49.776186,30.237181,0.519557
1500,1.5198,1.459476,52.585451,33.828823,0.543579
2000,1.39,1.403693,51.676352,36.978905,0.542347
2500,1.2839,1.384544,52.103121,39.619926,0.54635
3000,1.2333,1.353218,54.272727,40.546791,0.562673
3500,1.2252,1.309087,55.871886,41.591854,0.57684
4000,1.0577,1.372836,54.929577,41.322655,0.559593
4500,0.9799,1.322094,56.303442,42.951202,0.579612
5000,1.0015,1.32078,56.227456,43.783352,0.574376


[Step 500] F1 = 40.031
[Step 1000] F1 = 49.776
[Step 1500] F1 = 52.585
[Step 2000] F1 = 51.676
[Step 2500] F1 = 52.103
[Step 3000] F1 = 54.273
[Step 3500] F1 = 55.872
[Step 4000] F1 = 54.930
[Step 4500] F1 = 56.303
[Step 5000] F1 = 56.227
[Step 5500] F1 = 56.753
[Step 6000] F1 = 56.598
[Step 6500] F1 = 56.336
[Step 7000] F1 = 56.727
[Step 7500] F1 = 57.358
[Step 8000] F1 = 56.134
[Step 8500] F1 = 56.702
[Step 9000] F1 = 56.647


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Model saved to: ./grid_runs_re/bert-base-multilingual-cased_lr3e-05_ep5_bs16/best

[grid 4/24] bert-base-multilingual-cased_lr3e-05_ep5_bs32
[info] added 12 special tokens


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[check] vocab_size=119559, max_input_id=119558


  trainer = Trainer(


Step,Training Loss,Validation Loss,Micro f1 score,Auprc,Accuracy
500,1.9204,1.583471,48.687043,29.284107,0.502002
1000,1.4686,1.389013,53.698122,36.183041,0.560517
1500,1.2614,1.355457,54.819552,41.075401,0.569757
2000,1.1639,1.321458,56.256921,41.95834,0.579304
2500,1.0238,1.312771,57.348581,43.397778,0.581152
3000,0.9073,1.340821,57.149104,43.526966,0.584848
3500,0.8366,1.328364,56.989016,42.749771,0.583
4000,0.7244,1.37965,57.279029,43.160412,0.587003
4500,0.6911,1.373147,57.086786,43.65115,0.586079


[Step 500] F1 = 48.687
[Step 1000] F1 = 53.698
[Step 1500] F1 = 54.820
[Step 2000] F1 = 56.257
[Step 2500] F1 = 57.349
[Step 3000] F1 = 57.149
[Step 3500] F1 = 56.989
[Step 4000] F1 = 57.279
[Step 4500] F1 = 57.087


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Model saved to: ./grid_runs_re/bert-base-multilingual-cased_lr3e-05_ep5_bs32/best

[grid 5/24] bert-base-multilingual-cased_lr2e-05_ep5_bs16
[info] added 12 special tokens


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[check] vocab_size=119559, max_input_id=119558


  trainer = Trainer(


Step,Training Loss,Validation Loss,Micro f1 score,Auprc,Accuracy
500,2.1419,1.792337,40.820621,22.879948,0.474592
1000,1.6791,1.559504,49.595805,29.429262,0.514629
1500,1.5303,1.481071,51.427287,32.283531,0.538035
2000,1.3958,1.386866,52.409091,38.062431,0.547274
2500,1.2904,1.363956,53.213958,40.449387,0.553742
3000,1.2385,1.350881,54.0209,41.756233,0.559901
3500,1.232,1.301736,56.119937,41.781757,0.578072
4000,1.0911,1.33041,56.032458,42.565502,0.568525
4500,1.0097,1.313182,56.540084,43.025963,0.585464
5000,1.0316,1.311766,57.003188,42.782982,0.57992


[Step 500] F1 = 40.821
[Step 1000] F1 = 49.596
[Step 1500] F1 = 51.427
[Step 2000] F1 = 52.409
[Step 2500] F1 = 53.214
[Step 3000] F1 = 54.021
[Step 3500] F1 = 56.120
[Step 4000] F1 = 56.032
[Step 4500] F1 = 56.540
[Step 5000] F1 = 57.003
[Step 5500] F1 = 56.930
[Step 6000] F1 = 56.574
[Step 6500] F1 = 56.492
[Step 7000] F1 = 57.778
[Step 7500] F1 = 57.560
[Step 8000] F1 = 57.025
[Step 8500] F1 = 57.023
[Step 9000] F1 = 57.243


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Model saved to: ./grid_runs_re/bert-base-multilingual-cased_lr2e-05_ep5_bs16/best

[grid 6/24] bert-base-multilingual-cased_lr2e-05_ep5_bs32
[info] added 12 special tokens


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[check] vocab_size=119559, max_input_id=119558


  trainer = Trainer(


Step,Training Loss,Validation Loss,Micro f1 score,Auprc,Accuracy
500,1.9908,1.668764,45.741879,26.428545,0.480751
1000,1.515,1.424898,52.885052,35.006413,0.553126
1500,1.3056,1.371129,54.286361,39.909363,0.565445
2000,1.2095,1.326315,56.12542,41.569946,0.578996
2500,1.0817,1.331069,56.593407,43.347759,0.574068
3000,0.993,1.313676,56.866197,43.35445,0.58454
3500,0.928,1.301,57.284911,44.018826,0.585771
4000,0.8348,1.325647,57.615318,44.937878,0.590699
4500,0.8109,1.326897,57.956522,44.62289,0.593163


[Step 500] F1 = 45.742
[Step 1000] F1 = 52.885
[Step 1500] F1 = 54.286
[Step 2000] F1 = 56.125
[Step 2500] F1 = 56.593
[Step 3000] F1 = 56.866
[Step 3500] F1 = 57.285
[Step 4000] F1 = 57.615
[Step 4500] F1 = 57.957


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Model saved to: ./grid_runs_re/bert-base-multilingual-cased_lr2e-05_ep5_bs32/best

[grid 7/24] kykim_bert-kor-base_lr5e-05_ep5_bs16


tokenizer_config.json:   0%|          | 0.00/80.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/725 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

[info] added 12 special tokens


pytorch_model.bin:   0%|          | 0.00/476M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at kykim/bert-kor-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


model.safetensors:   0%|          | 0.00/476M [00:00<?, ?B/s]

[check] vocab_size=42012, max_input_id=42011


  trainer = Trainer(


Step,Training Loss,Validation Loss,Micro f1 score,Auprc,Accuracy
500,1.8882,1.580265,48.948882,28.680882,0.504466
1000,1.523,1.409328,53.533569,36.148712,0.544503
1500,1.4084,1.374202,53.497409,38.163471,0.547582
2000,1.2635,1.335663,55.903614,43.221474,0.570373
2500,1.1281,1.291334,55.250404,42.734081,0.5753
3000,1.0896,1.323989,56.052061,43.764256,0.569141
3500,1.0972,1.245822,56.991852,45.297676,0.585156
4000,0.8982,1.3245,58.538658,45.330872,0.591007
4500,0.8059,1.27982,59.288365,47.052704,0.605482
5000,0.8128,1.309303,57.953823,47.157738,0.586079


[Step 500] F1 = 48.949
[Step 1000] F1 = 53.534
[Step 1500] F1 = 53.497
[Step 2000] F1 = 55.904
[Step 2500] F1 = 55.250
[Step 3000] F1 = 56.052
[Step 3500] F1 = 56.992
[Step 4000] F1 = 58.539
[Step 4500] F1 = 59.288
[Step 5000] F1 = 57.954
[Step 5500] F1 = 58.679
[Step 6000] F1 = 58.033
[Step 6500] F1 = 58.348
[Step 7000] F1 = 58.215
[Step 7500] F1 = 58.463
[Step 8000] F1 = 58.278
[Step 8500] F1 = 58.475
[Step 9000] F1 = 58.588


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Model saved to: ./grid_runs_re/kykim_bert-kor-base_lr5e-05_ep5_bs16/best

[grid 8/24] kykim_bert-kor-base_lr5e-05_ep5_bs32
[info] added 12 special tokens


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at kykim/bert-kor-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[check] vocab_size=42012, max_input_id=42011


  trainer = Trainer(


Step,Training Loss,Validation Loss,Micro f1 score,Auprc,Accuracy
500,1.7381,1.451286,52.8984,34.695683,0.530028
1000,1.3358,1.309472,56.234498,41.958435,0.582692
1500,1.1056,1.315757,56.356026,44.121178,0.579304
2000,0.9953,1.304519,57.68384,45.39793,0.593779
2500,0.8167,1.304483,58.378604,46.120384,0.588543
3000,0.6962,1.372425,57.90501,45.659428,0.595011
3500,0.6061,1.369532,59.105638,44.415985,0.606098
4000,0.4806,1.473555,58.19209,43.783123,0.598706
4500,0.4419,1.471573,58.253692,44.277004,0.600862


[Step 500] F1 = 52.898
[Step 1000] F1 = 56.234
[Step 1500] F1 = 56.356
[Step 2000] F1 = 57.684
[Step 2500] F1 = 58.379
[Step 3000] F1 = 57.905
[Step 3500] F1 = 59.106
[Step 4000] F1 = 58.192
[Step 4500] F1 = 58.254


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Model saved to: ./grid_runs_re/kykim_bert-kor-base_lr5e-05_ep5_bs32/best

[grid 9/24] kykim_bert-kor-base_lr3e-05_ep5_bs16
[info] added 12 special tokens


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at kykim/bert-kor-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[check] vocab_size=42012, max_input_id=42011


  trainer = Trainer(


Step,Training Loss,Validation Loss,Micro f1 score,Auprc,Accuracy
500,1.9426,1.574694,47.319347,28.784033,0.504466
1000,1.5198,1.425358,51.967494,35.348969,0.526948
1500,1.3982,1.38367,54.22301,37.340973,0.55713
2000,1.2667,1.314726,55.521739,42.697868,0.570989
2500,1.1293,1.287769,55.351057,44.811203,0.574684
3000,1.0949,1.312289,55.467372,44.008236,0.574376
3500,1.0926,1.245001,57.403696,45.613354,0.591623
4000,0.9196,1.303511,58.646934,45.959263,0.593779
4500,0.8276,1.268549,59.174903,45.534821,0.60271
5000,0.8441,1.296022,58.228923,45.948402,0.589159


[Step 500] F1 = 47.319
[Step 1000] F1 = 51.967
[Step 1500] F1 = 54.223
[Step 2000] F1 = 55.522
[Step 2500] F1 = 55.351
[Step 3000] F1 = 55.467
[Step 3500] F1 = 57.404
[Step 4000] F1 = 58.647
[Step 4500] F1 = 59.175
[Step 5000] F1 = 58.229
[Step 5500] F1 = 58.346
[Step 6000] F1 = 58.266
[Step 6500] F1 = 57.292
[Step 7000] F1 = 57.599
[Step 7500] F1 = 58.219
[Step 8000] F1 = 57.897
[Step 8500] F1 = 57.797
[Step 9000] F1 = 58.282


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Model saved to: ./grid_runs_re/kykim_bert-kor-base_lr3e-05_ep5_bs16/best

[grid 10/24] kykim_bert-kor-base_lr3e-05_ep5_bs32
[info] added 12 special tokens


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at kykim/bert-kor-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[check] vocab_size=42012, max_input_id=42011


  trainer = Trainer(


Step,Training Loss,Validation Loss,Micro f1 score,Auprc,Accuracy
500,1.7921,1.514518,51.122907,31.8225,0.512165
1000,1.3548,1.312245,56.071829,41.200527,0.579612
1500,1.1473,1.302477,57.449606,44.862511,0.587311
2000,1.0532,1.279467,57.34327,45.094879,0.586695
2500,0.8899,1.278311,58.164983,46.775345,0.590083
3000,0.7901,1.32928,58.498368,45.341362,0.599322
3500,0.7152,1.308512,58.292106,44.712456,0.592855
4000,0.6106,1.371919,58.62143,44.701168,0.601478
4500,0.5824,1.366687,58.13253,44.953255,0.596243


[Step 500] F1 = 51.123
[Step 1000] F1 = 56.072
[Step 1500] F1 = 57.450
[Step 2000] F1 = 57.343
[Step 2500] F1 = 58.165
[Step 3000] F1 = 58.498
[Step 3500] F1 = 58.292
[Step 4000] F1 = 58.621
[Step 4500] F1 = 58.133


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Model saved to: ./grid_runs_re/kykim_bert-kor-base_lr3e-05_ep5_bs32/best

[grid 11/24] kykim_bert-kor-base_lr2e-05_ep5_bs16
[info] added 12 special tokens


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at kykim/bert-kor-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[check] vocab_size=42012, max_input_id=42011


  trainer = Trainer(


Step,Training Loss,Validation Loss,Micro f1 score,Auprc,Accuracy
500,2.0219,1.652164,45.394737,27.454641,0.491839
1000,1.5573,1.46888,51.91164,33.023174,0.527872
1500,1.4219,1.405596,53.521127,35.9837,0.554974
2000,1.3009,1.322583,55.604396,41.905956,0.573452
2500,1.1735,1.291937,55.856266,43.35899,0.57992
3000,1.1342,1.309063,56.74786,44.329441,0.580536
3500,1.1299,1.255774,57.662453,45.056618,0.592547
4000,0.9836,1.288626,58.520085,45.862011,0.588235
4500,0.8955,1.26324,58.749457,45.840157,0.59963
5000,0.9086,1.280806,59.714645,47.108181,0.595627


[Step 500] F1 = 45.395
[Step 1000] F1 = 51.912
[Step 1500] F1 = 53.521
[Step 2000] F1 = 55.604
[Step 2500] F1 = 55.856
[Step 3000] F1 = 56.748
[Step 3500] F1 = 57.662
[Step 4000] F1 = 58.520
[Step 4500] F1 = 58.749
[Step 5000] F1 = 59.715
[Step 5500] F1 = 58.803
[Step 6000] F1 = 58.968
[Step 6500] F1 = 58.559
[Step 7000] F1 = 58.046
[Step 7500] F1 = 58.776
[Step 8000] F1 = 58.456
[Step 8500] F1 = 58.524
[Step 9000] F1 = 58.918


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Model saved to: ./grid_runs_re/kykim_bert-kor-base_lr2e-05_ep5_bs16/best

[grid 12/24] kykim_bert-kor-base_lr2e-05_ep5_bs32
[info] added 12 special tokens


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at kykim/bert-kor-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[check] vocab_size=42012, max_input_id=42011


  trainer = Trainer(


Step,Training Loss,Validation Loss,Micro f1 score,Auprc,Accuracy
500,1.8776,1.55314,50.272537,30.529871,0.509085
1000,1.4043,1.340672,55.034148,39.875877,0.568217
1500,1.2114,1.314952,56.502441,42.493217,0.57992
2000,1.1252,1.283388,57.098353,44.001396,0.586387
2500,0.9877,1.279668,58.27674,46.200451,0.592239
3000,0.9035,1.302569,57.729682,45.486389,0.594087
3500,0.8429,1.278867,57.934292,45.567768,0.591623
4000,0.7594,1.309646,57.883369,45.086895,0.594087
4500,0.7324,1.304445,57.768147,46.375031,0.593163


[Step 500] F1 = 50.273
[Step 1000] F1 = 55.034
[Step 1500] F1 = 56.502
[Step 2000] F1 = 57.098
[Step 2500] F1 = 58.277
[Step 3000] F1 = 57.730
[Step 3500] F1 = 57.934
[Step 4000] F1 = 57.883
[Step 4500] F1 = 57.768


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Model saved to: ./grid_runs_re/kykim_bert-kor-base_lr2e-05_ep5_bs32/best

[grid 13/24] monologg_koelectra-base-v3_lr5e-05_ep5_bs16
[error] OSError: monologg/koelectra-base-v3 is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `hf auth login` or by passing `token=<your_token>`
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_http.py", line 409, in hf_raise_for_status
    response.raise_for_status()
  File "/usr/local/lib/python3.11/dist-packages/requests/models.py", line 1024, in raise_for_status
    raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 401 Client Error: Unauthorized for url: https://huggingface.co/monologg/koelectra-base-v3/resolve/main/tokenizer_config.json

The above exception was the direct cause of the following exception:


FileNotFoundError: [Errno 2] No such file or directory: './grid_runs_re/monologg_koelectra-base-v3_lr5e-05_ep5_bs16'

| Run                                   | Best Step | Micro F1  | AUPRC     | Accuracy | Val Loss |
|---------------------------------------|-----------|-----------|-----------|----------|----------|
| klue_bert-base_lr3e-05_ep10_bs16      | 4500      | 59.8642   | 47.9678   | 0.613181 | 1.274938 |
| klue_bert-base_lr3e-05_ep5_bs16       | 4000      | 59.7681   | 47.1765   | 0.606098 | 1.287195 |
| klue_bert-base_lr5e-05_ep5_bs16       | 7000      | 59.6718   | 47.4565   | 0.609794 | 1.430602 |
| klue_bert-base_lr3e-05_ep5_bs32       | 3000      | 59.4809   | 46.9674   | 0.607330 | 1.329728 |
| klue_bert-base_lr5e-05_ep5_bs32       | 3500      | 59.4452   | 48.2639   | 0.608870 | 1.387745 |
| klue_bert-base_lr5e-05_ep10_bs16      | 5000      | 59.6273   | 47.5093   | 0.604866 | 1.282922 |
| klue_bert-base_lr5e-05_ep10_bs32      | 2000      | 59.3649   | 47.0007   | 0.602402 | 1.327961 |
| klue_roberta-base_lr5e-05_ep5_bs16                         | 7500      | 59.6945   | 47.8759   | 0.604866 | 1.481126 |
| klue_roberta-base_lr5e-05_ep5_bs32                         | 4500      | 59.8912   | 47.5600   | 0.614721 | 1.428611 |
| klue_roberta-base_lr3e-05_ep5_bs16                         | 7500      | 60.2211   | 46.0446   | 0.615953 | 1.460129 |
| klue_roberta-base_lr3e-05_ep5_bs32                         | 3500      | 60.3333   | 47.9823   | 0.615953 | 1.291947 |
| klue_roberta-base_lr2e-05_ep5_bs16                         | 8000      | 60.6296   | 48.4109   | 0.618417 | 1.374527 |
| klue_roberta-base_lr2e-05_ep5_bs32                         | 4000      | 58.8595   | 48.3168   | 0.607022 | 1.319022 |
| monologg_koelectra-base-v3-discriminator_lr5e-05_ep5_bs16  | 6000      | 59.3478   | 41.7290   | 0.604558 | 1.384946 |
| monologg_koelectra-base-v3-discriminator_lr5e-05_ep5_bs32  | 3000      | 59.0235   | 39.5930   | 0.607330 | 1.359320 |
| monologg_koelectra-base-v3-discriminator_lr3e-05_ep5_bs16  | 8000      | 59.6228   | 39.8815   | 0.607946 | 1.417223 |
| monologg_koelectra-base-v3-discriminator_lr3e-05_ep5_bs32  | 3000      | 57.6611   | 34.6298   | 0.588235 | 1.338130 |
| bert-base-multilingual-cased_lr5e-05_ep5_bs16     | 5500      | 56.020   | 41.428  | 0.579920 | 1.363017 |
| bert-base-multilingual-cased_lr5e-05_ep5_bs32     | 3500      | 57.514   | 42.706  | 0.590699 | 1.343769 |
| bert-base-multilingual-cased_lr3e-05_ep5_bs16     | 7500      | 57.358   | 43.130  | 0.586387 | 1.438589 |
| bert-base-multilingual-cased_lr3e-05_ep5_bs32     | 2500      | 57.349   | 43.398  | 0.581152 | 1.312771 |
| bert-base-multilingual-cased_lr2e-05_ep5_bs16     | 7000      | 57.778   | 43.670  | 0.592239 | 1.322330 |
| bert-base-multilingual-cased_lr2e-05_ep5_bs32     | 4500      | 57.957   | 44.623  | 0.593163 | 1.326897 |
| kykim_bert-kor-base_lr5e-05_ep5_bs16              | 4500      | 59.288   | 47.053  | 0.605482 | 1.279820 |
| kykim_bert-kor-base_lr5e-05_ep5_bs32              | 3500      | 59.106   | 44.416  | 0.606098 | 1.369532 |
| kykim_bert-kor-base_lr3e-05_ep5_bs16              | 4500      | 59.175   | 45.535  | 0.602710 | 1.268549 |
| kykim_bert-kor-base_lr3e-05_ep5_bs32              | 4000      | 58.621   | 44.701  | 0.601478 | 1.371919 |
| kykim_bert-kor-base_lr2e-05_ep5_bs16              | 5000      | 59.715   | 47.108  | 0.595627 | 1.280806 |
| kykim_bert-kor-base_lr2e-05_ep5_bs32              | 2500      | 58.277   | 46.200  | 0.592239 | 1.279668 |



    데이터 전처리

    preprocessing_dataset()에서 subject_entity, object_entity 컬럼의 문자열(dict-like)에서 word/type을 파싱해 사용 가능한 형태로 정리.

    엔티티 마커 기반 토크나이즈

    tokenized_dataset()에서 문장 앞에
    "[E1-{TYPE}]{subj}[/E1] [E2-{TYPE}]{obj}[/E2]"
    방식의 타입 포함 마커(PER/ORG/LOC/UNK) 를 주입.

    토크나이저에 아래 특수토큰 추가:
    "[E1]","[/E1]","[E2]","[/E2]","[E1-PER]","[E2-PER]","[E1-ORG]","[E2-ORG]","[E1-LOC]","[E2-LOC]","[E1-UNK]","[E2-UNK]"

    토크나이저/모델 정합

    특수토큰 추가 후 model.resize_token_embeddings(len(tokenizer))로 임베딩 크기 리사이즈(OOV 방지).

    RoBERTa 계열 호환 위해 token_type_ids 제거.

    max_length=256.

    학습 설정

    모델: klue/roberta-base

    하이퍼파라미터(베스트 러닝): lr=2e-5, epochs=5, batch_size=16

    weight decay 0.01, warmup 0, seed 42, (CUDA면) fp16 사용.

    dev가 없으면 eval_strategy='no'로 안전하게 세팅(에러 회피).

    결과/체크포인트 경로 분리 저장.

    지표

    micro_f1_wo_no_relation: no_relation 제외 마이크로 F1

    auprc_all: 클래스별 PR AUC 평균

    accuracy 기본 정확도

    (dev가 있을 때는 EarlyStopping/load_best_model_at_end 활성)

    그리드 실행

    run_grid()로 모델×lr×epochs×batch_size 조합을 순차 실행, 성능/시간/에러를 데이터프레임으로 수집 및 CSV 저장.