# FULL PIPELINE (COLAB) — HOLDOUT + KFOLD + TOKENIZATION CHECK

In [None]:
# ============================================================
# Models: IndoBERT, IndoBERTweet, IndoRoBERTa
# Regularization: Dropout (head) + L2 (AdamW weight_decay) + EarlyStopping
# Save models to:
# /content/drive/MyDrive/SKRIPSI/models/<MODEL_TAG>/
#   ├── holdout/<SCHEME>/
#   └── kfold/k<K>/fold<F>/
# ============================================================

# ---------- 0) Imports ----------
import os, time, json
import numpy as np
import pandas as pd
import torch
import torch.nn as nn

from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers import (
    AutoTokenizer, AutoModel, AutoConfig,
    Trainer, TrainingArguments, EarlyStoppingCallback
)
from transformers.modeling_outputs import SequenceClassifierOutput

In [1]:
# ===== 1) Mount Google Drive =====
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# ---------- 2) Paths ----------
SPLIT_HOLDOUT_ROOT = "/content/drive/MyDrive/SKRIPSI/split/HO"
SPLIT_KFOLD_ROOT   = "/content/drive/MyDrive/SKRIPSI/split/K"
MODELS_ROOT        = "/content/drive/MyDrive/SKRIPSI/models"
RESULTS_ROOT       = "/content/drive/MyDrive/SKRIPSI/results"

os.makedirs(MODELS_ROOT, exist_ok=True)
os.makedirs(RESULTS_ROOT, exist_ok=True)

In [None]:
# ---------- 3) Dataset columns ----------
TEXT_COL  = "text"
LABEL_COL = "label_id"
NUM_LABELS = 6

In [None]:
# ---------- 4) Model Zoo ----------
MODEL_ZOO = {
    "IndoBERT":     "indobenchmark/indobert-base-p1",
    "IndoBERTweet": "indolem/indobertweet-base-uncased",
    "IndoRoBERTa":  "cahya/roberta-base-indonesian-522M",
}

## Check PATH Datasets

In [None]:
import os

HO_ROOT = "/content/drive/MyDrive/SKRIPSI/split/HO"
folder_ho = ["HO_80_10_10", "HO_70_15_15", "HO_90_05_05"]
schemes   = ["80_10_10", "70_15_15", "90_05_05"]   # nama file

print("===== CHECK HOLDOUT FILES (folder HO_xxx + file train_xxx.csv) =====")
print("HO root exists ->", os.path.exists(HO_ROOT))

for folder_name, scheme in zip(folder_ho, schemes):
    sc_dir = os.path.join(HO_ROOT, folder_name)

    print(f"\nChecking folder: {folder_name} | files: *_{scheme}.csv")
    print(" folder ->", os.path.exists(sc_dir))

    train_path = os.path.join(sc_dir, f"train_{scheme}.csv")
    val_path   = os.path.join(sc_dir, f"val_{scheme}.csv")
    test_path  = os.path.join(sc_dir, f"test_{scheme}.csv")

    print(" train ->", os.path.exists(train_path))
    print(" val   ->", os.path.exists(val_path))
    print(" test  ->", os.path.exists(test_path))

===== CHECK HOLDOUT FILES (folder HO_xxx + file train_xxx.csv) =====
HO root exists -> True

Checking folder: HO_80_10_10 | files: *_80_10_10.csv
 folder -> True
 train -> True
 val   -> True
 test  -> True

Checking folder: HO_70_15_15 | files: *_70_15_15.csv
 folder -> True
 train -> True
 val   -> True
 test  -> True

Checking folder: HO_90_05_05 | files: *_90_05_05.csv
 folder -> True
 train -> True
 val   -> True
 test  -> True


In [None]:
SPLIT_KFOLD_ROOT = "/content/drive/MyDrive/SKRIPSI/split/K"

print("\n===== CHECK KFOLD FILES =====")

for k in range(2, 11):
    k_dir = os.path.join(SPLIT_KFOLD_ROOT, f"k{k}")

    print(f"\nChecking k{k}")
    print("folder exists ->", os.path.exists(k_dir))

    if os.path.exists(k_dir):
        for fold in range(1, k + 1):
            train_path = os.path.join(k_dir, f"train_fold{fold}.csv")
            val_path   = os.path.join(k_dir, f"val_fold{fold}.csv")

            print(f" fold{fold} train ->", os.path.exists(train_path))
            print(f" fold{fold} val   ->", os.path.exists(val_path))


===== CHECK KFOLD FILES =====

Checking k2
folder exists -> True
 fold1 train -> True
 fold1 val   -> True
 fold2 train -> True
 fold2 val   -> True

Checking k3
folder exists -> True
 fold1 train -> True
 fold1 val   -> True
 fold2 train -> True
 fold2 val   -> True
 fold3 train -> True
 fold3 val   -> True

Checking k4
folder exists -> True
 fold1 train -> True
 fold1 val   -> True
 fold2 train -> True
 fold2 val   -> True
 fold3 train -> True
 fold3 val   -> True
 fold4 train -> True
 fold4 val   -> True

Checking k5
folder exists -> True
 fold1 train -> True
 fold1 val   -> True
 fold2 train -> True
 fold2 val   -> True
 fold3 train -> True
 fold3 val   -> True
 fold4 train -> True
 fold4 val   -> True
 fold5 train -> True
 fold5 val   -> True

Checking k6
folder exists -> True
 fold1 train -> True
 fold1 val   -> True
 fold2 train -> True
 fold2 val   -> True
 fold3 train -> True
 fold3 val   -> True
 fold4 train -> True
 fold4 val   -> True
 fold5 train -> True
 fold5 val   -> T

## Define Function

In [None]:
# ===== 5) Hyperparameters  =====
MAX_LEN = 128
LR = 2e-5
BATCH_SIZE = 16
EPOCHS = 20                 # maksimum; early stopping bisa stop lebih cepat
DROPOUT_RATE = 0.3
WEIGHT_DECAY = 0.01         # L2/weight decay
EARLY_PATIENCE = 2
EARLY_THRESHOLD = 0.0
SEED = 42


In [None]:
# ---------- 6) Dataset class ----------
class TextClsDataset(torch.utils.data.Dataset):
    def __init__(self, df: pd.DataFrame, tokenizer, max_len: int = 128):
        self.texts = df[TEXT_COL].astype(str).tolist()
        self.labels = df[LABEL_COL].astype(int).tolist()
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self): return len(self.texts)

    def __getitem__(self, idx: int):
        enc = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt"
        )
        item = {k: v.squeeze(0) for k, v in enc.items()}
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

In [None]:
# ---------- 7) Custom model: Dropout head ----------
class TransformerWithDropoutHead(nn.Module):
    def __init__(self, model_name: str, num_labels: int = 6, dropout_rate: float = 0.3):
        super().__init__()
        config = AutoConfig.from_pretrained(model_name)
        # internal dropout (kalau didukung)
        setattr(config, "hidden_dropout_prob", dropout_rate)
        setattr(config, "attention_probs_dropout_prob", dropout_rate)

        self.backbone = AutoModel.from_pretrained(model_name, config=config)
        hidden_size = self.backbone.config.hidden_size

        self.dropout = nn.Dropout(dropout_rate)  # dropout tambahan di head
        self.classifier = nn.Linear(hidden_size, num_labels)
        self.loss_fn = nn.CrossEntropyLoss()

    def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, labels=None, **kwargs):
        forward_args = {"input_ids": input_ids, "attention_mask": attention_mask}
        # RoBERTa-like biasanya tidak pakai token_type_ids
        if "token_type_ids" in self.backbone.forward.__code__.co_varnames and token_type_ids is not None:
            forward_args["token_type_ids"] = token_type_ids

        outputs = self.backbone(**forward_args)
        cls = outputs.last_hidden_state[:, 0, :]
        x = self.dropout(cls)
        logits = self.classifier(x)

        loss = None
        if labels is not None:
            loss = self.loss_fn(logits, labels)

        return SequenceClassifierOutput(loss=loss, logits=logits)

In [None]:
# ---------- 8) Metrics ----------
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=1)
    acc = accuracy_score(labels, preds)
    prec, rec, f1, _ = precision_recall_fscore_support(labels, preds, average="macro", zero_division=0)
    return {"accuracy": acc, "precision_macro": prec, "recall_macro": rec, "f1_macro": f1}

In [None]:
# ---------- 9) Tokenization sanity check ----------
def check_tokenization_for_all_models(sample_text: str, max_len: int = 128):
    """
    Cek tokenisasi untuk 3 model:
    - input_ids shape
    - attention_mask shape
    - forward pass backbone (opsional tapi bagus)
    """
    print("\n================ TOKENIZATION CHECK ================")
    print("Sample text:", sample_text[:200], "..." if len(sample_text) > 200 else "")
    for tag, model_name in MODEL_ZOO.items():
        print(f"\n--- {tag} ({model_name}) ---")
        tok = AutoTokenizer.from_pretrained(model_name, use_fast=True)
        enc = tok(
            sample_text,
            truncation=True,
            padding="max_length",
            max_length=max_len,
            return_tensors="pt"
        )
        print("input_ids:", tuple(enc["input_ids"].shape))
        print("attention_mask:", tuple(enc["attention_mask"].shape))

        # forward pass cepat (tanpa grad)
        backbone = AutoModel.from_pretrained(model_name)
        backbone.eval()
        with torch.no_grad():
            out = backbone(**enc)
        print("last_hidden_state:", tuple(out.last_hidden_state.shape))
    print("====================================================\n")

In [None]:
# ---------- 10) Train 1 run (model + 1 split) ----------
def train_one_run(
    model_tag: str,
    hf_model_name: str,
    train_df: pd.DataFrame,
    val_df: pd.DataFrame,
    out_dir: str,
):
    os.makedirs(out_dir, exist_ok=True)

    tokenizer = AutoTokenizer.from_pretrained(hf_model_name, use_fast=True)
    train_ds = TextClsDataset(train_df, tokenizer, max_len=MAX_LEN)
    val_ds   = TextClsDataset(val_df, tokenizer, max_len=MAX_LEN)

    model = TransformerWithDropoutHead(hf_model_name, num_labels=NUM_LABELS, dropout_rate=DROPOUT_RATE)

    args = TrainingArguments(
        output_dir=out_dir,
        run_name=os.path.basename(out_dir),
        learning_rate=LR,
        per_device_train_batch_size=BATCH_SIZE,
        per_device_eval_batch_size=BATCH_SIZE,
        num_train_epochs=EPOCHS,
        weight_decay=WEIGHT_DECAY,
        eval_strategy="epoch",
        save_strategy="epoch",
        save_total_limit=2,
        load_best_model_at_end=True,
        metric_for_best_model="f1_macro",
        greater_is_better=True,
        logging_strategy="epoch",
        fp16=torch.cuda.is_available(),
        report_to="none",
        seed=SEED
    )

    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_ds,
        eval_dataset=val_ds,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(
            early_stopping_patience=EARLY_PATIENCE,
            early_stopping_threshold=EARLY_THRESHOLD
        )]
    )

    t0 = time.time()
    train_result = trainer.train()
    t1 = time.time()
    eval_result = trainer.evaluate()

    # ===== SIMPAN LOG TRAINING =====
    log_history = trainer.state.log_history
    pd.DataFrame(log_history).to_csv(
      os.path.join(out_dir, "log_history.csv"),
      index=False
    )

    # ===== SIMPAN PREDIKSI VALIDATION =====
    pred_output = trainer.predict(val_ds)

    val_preds = np.argmax(pred_output.predictions, axis=1)
    val_labels = pred_output.label_ids

    pd.DataFrame({
        "true_label": val_labels,
        "pred_label": val_preds
    }).to_csv(
        os.path.join(out_dir, "val_predictions.csv"),
        index=False
    )

    trainer.save_model(out_dir)
    tokenizer.save_pretrained(out_dir)

    summary = {
        "model_tag": model_tag,
        "hf_model_name": hf_model_name,
        "out_dir": out_dir,
        "train_time_sec": round(t1 - t0, 2),
        "train_metrics": {k: float(v) for k, v in train_result.metrics.items()},
        "eval_metrics": {k: float(v) for k, v in eval_result.items()},
        "hyperparams": {
            "max_len": MAX_LEN,
            "lr": LR,
            "batch_size": BATCH_SIZE,
            "epochs_max": EPOCHS,
            "dropout_rate": DROPOUT_RATE,
            "weight_decay": WEIGHT_DECAY,
            "early_patience": EARLY_PATIENCE,
            "early_threshold": EARLY_THRESHOLD
        }
    }
    with open(os.path.join(out_dir, "summary.json"), "w", encoding="utf-8") as f:
        json.dump(summary, f, ensure_ascii=False, indent=2)

    return summary

# Hold Out Schemes

In [None]:
def run_holdout_training():
    HO_ROOT = "/content/drive/MyDrive/SKRIPSI/split/HO"

    folder_ho = ["HO_80_10_10", "HO_70_15_15", "HO_90_05_05"]
    schemes   = ["80_10_10", "70_15_15", "90_05_05"]

    rows = []

    for folder_name, scheme in zip(folder_ho, schemes):
        sc_dir = os.path.join(HO_ROOT, folder_name)

        train_path = os.path.join(sc_dir, f"train_{scheme}.csv")
        val_path   = os.path.join(sc_dir, f"val_{scheme}.csv")

        if not (os.path.exists(train_path) and os.path.exists(val_path)):
            print(f"[HOLDOUT SKIP] File tidak lengkap untuk {folder_name}")
            continue

        train_df = pd.read_csv(train_path)
        val_df   = pd.read_csv(val_path)

        # cek kolom
        if TEXT_COL not in train_df.columns or LABEL_COL not in train_df.columns:
            raise ValueError(f"Kolom {TEXT_COL}/{LABEL_COL} tidak ada di {train_path}")
        if TEXT_COL not in val_df.columns or LABEL_COL not in val_df.columns:
            raise ValueError(f"Kolom {TEXT_COL}/{LABEL_COL} tidak ada di {val_path}")

        print(f"\n===== HOLDOUT TRAINING: {folder_name} =====")

        # 🔥 train 3 model
        for model_tag, hf_name in MODEL_ZOO.items():

            out_dir = os.path.join(
                MODELS_ROOT,
                model_tag,
                "holdout",
                folder_name   # pakai nama folder biar rapi
            )

            print(f"--- Training {model_tag} ---")

            summary = train_one_run(
                model_tag,
                hf_name,
                train_df,
                val_df,
                out_dir
            )

            ev = summary["eval_metrics"]

            rows.append({
                "split_type": "holdout",
                "scheme": folder_name,
                "model": model_tag,
                "train_time_sec": summary["train_time_sec"],
                "eval_loss": ev.get("eval_loss"),
                "eval_accuracy": ev.get("eval_accuracy"),
                "eval_precision_macro": ev.get("eval_precision_macro"),
                "eval_recall_macro": ev.get("eval_recall_macro"),
                "eval_f1_macro": ev.get("eval_f1_macro"),
                "saved_to": summary["out_dir"]
            })

    out_csv = os.path.join(RESULTS_ROOT, "results_holdout_final.csv")
    pd.DataFrame(rows).to_csv(out_csv, index=False)

    print("\n[SUKSES] Holdout selesai.")
    print("Rekap:", out_csv)

    return pd.DataFrame(rows)

# K-FOLD Schemes

In [None]:
# ---------- 12) K-FOLD TRAINING ----------
def run_kfold_training(k_values=range(2, 11), token_check_fold=1):
    """
    Expect folder:
      /split/K/k2/train_fold1.csv, val_fold1.csv, ..., train_fold2.csv...
    Tokenization check:
      - per k (default hanya fold 1 biar tidak berat) => token_check_fold=1
    Save:
      /models/<ModelTag>/kfold/k2/fold1/
    """
    rows = []

    for k in k_values:
        k_dir = os.path.join(SPLIT_KFOLD_ROOT, f"k{k}")
        if not os.path.isdir(k_dir):
            print(f"[KFOLD SKIP] Folder tidak ditemukan: {k_dir}")
            continue

        train_files = sorted([f for f in os.listdir(k_dir) if f.startswith("train_fold") and f.endswith(".csv")])
        num_folds = len(train_files)
        if num_folds == 0:
            print(f"[KFOLD SKIP] Tidak ada train_fold*.csv di {k_dir}")
            continue

        # ✅ tokenisasi check per k (ambil fold tertentu saja biar ringan)
        chk_fold = token_check_fold
        chk_train_path = os.path.join(k_dir, f"train_fold{chk_fold}.csv")
        if os.path.exists(chk_train_path):
            chk_train_df = pd.read_csv(chk_train_path)
            if TEXT_COL not in chk_train_df.columns:
                raise ValueError(f"Kolom {TEXT_COL} tidak ada di {chk_train_path}")
            sample_text = chk_train_df[TEXT_COL].iloc[0]
            print(f"\n### KFOLD k={k} — TOKENIZATION CHECK (fold {chk_fold}) ###")
            check_tokenization_for_all_models(sample_text, max_len=MAX_LEN)
        else:
            print(f"[KFOLD WARNING] Tidak bisa token check k={k}, fold {chk_fold} (file tidak ada)")

        # training semua fold untuk k itu
        for fold in range(1, num_folds + 1):
            train_path = os.path.join(k_dir, f"train_fold{fold}.csv")
            val_path   = os.path.join(k_dir, f"val_fold{fold}.csv")

            train_df = pd.read_csv(train_path)
            val_df   = pd.read_csv(val_path)

            if TEXT_COL not in train_df.columns or LABEL_COL not in train_df.columns:
                raise ValueError(f"Kolom {TEXT_COL}/{LABEL_COL} tidak ada di {train_path}")
            if TEXT_COL not in val_df.columns or LABEL_COL not in val_df.columns:
                raise ValueError(f"Kolom {TEXT_COL}/{LABEL_COL} tidak ada di {val_path}")

            for model_tag, hf_name in MODEL_ZOO.items():
                # simpan: /models/<ModelTag>/kfold/k{k}/fold{fold}/
                out_dir = os.path.join(MODELS_ROOT, model_tag, "kfold", f"k{k}", f"fold{fold}")
                print(f"\n===== TRAIN KFOLD k={k} fold={fold} | {model_tag} =====")
                try:
                    summary = train_one_run(model_tag, hf_name, train_df, val_df, out_dir)

                    ev = summary["eval_metrics"]
                    rows.append({
                        "split_type": "kfold",
                        "scheme": f"k{k}",
                        "fold": fold,
                        "model": model_tag,
                        "status": "OK",
                        "error": "",
                        "train_time_sec": summary.get("train_time_sec"),
                        "eval_loss": ev.get("eval_loss"),
                        "eval_accuracy": ev.get("eval_accuracy"),
                        "eval_precision_macro": ev.get("eval_precision_macro"),
                        "eval_recall_macro": ev.get("eval_recall_macro"),
                        "eval_f1_macro": ev.get("eval_f1_macro"),
                        "saved_to": summary.get("out_dir", out_dir),
                    })

                except Exception as e:
                    rows.append({
                        "split_type": "kfold",
                        "scheme": f"k{k}",
                        "fold": fold,
                        "model": model_tag,
                        "status": "FAILED",
                        "error": repr(e),
                        "train_time_sec": None,
                        "eval_loss": None,
                        "eval_accuracy": None,
                        "eval_precision_macro": None,
                        "eval_recall_macro": None,
                        "eval_f1_macro": None,
                        "saved_to": out_dir,
                    })
                    print(f"[ERROR] k={k} fold={fold} model={model_tag} -> {repr(e)}")

                pd.DataFrame(rows).to_csv(
                    os.path.join(RESULTS_ROOT, "results_kfold_progress.csv"),
                    index=False
                )
    out_csv = os.path.join(RESULTS_ROOT, "results_kfold_final.csv")
    pd.DataFrame(rows).to_csv(out_csv, index=False)
    print("\n[SUKSES] K-Fold selesai. Rekap:", out_csv)
    return pd.DataFrame(rows)

# Mini Test for Checking

In [None]:
def tokenization_sanity_check_once(sample_text: str, max_len: int = 128):
    """
    Cek tokenisasi 1x untuk 3 model (sekali sebelum semua training).
    """
    from transformers.utils import logging as hf_logging
    hf_logging.set_verbosity_error()   # sembunyikan warning/info dari transformers
    print("\n========== TOKENIZATION SANITY CHECK (ONCE) ==========")
    print("Sample text:", sample_text[:200], "..." if len(sample_text) > 200 else "")

    for tag, model_name in MODEL_ZOO.items():
        print(f"\n--- {tag} ({model_name}) ---")
        tok = AutoTokenizer.from_pretrained(model_name, use_fast=True)
        enc = tok(
            sample_text,
            truncation=True,
            padding="max_length",
            max_length=max_len,
            return_tensors="pt"
        )
        print(" input_ids:", tuple(enc["input_ids"].shape))
        print(" attention_mask:", tuple(enc["attention_mask"].shape))

        # OPTIONAL: forward pass cepat (biar yakin input cocok)
        backbone = AutoModel.from_pretrained(model_name)
        backbone.eval()
        with torch.no_grad():
            out = backbone(**enc)
        print(" last_hidden_state:", tuple(out.last_hidden_state.shape))

    print("======================================================\n")

In [None]:
# ========= Ambil sample text dari salah satu split (holdout kalau ada) =========
sample_text = None

# coba ambil dari holdout 80_10_10
holdout_sample_path = os.path.join(SPLIT_HOLDOUT_ROOT, "train_80_10_10.csv")
if os.path.exists(holdout_sample_path):
    df_tmp = pd.read_csv(holdout_sample_path)
    sample_text = df_tmp[TEXT_COL].astype(str).iloc[0]
else:
    # kalau holdout ga ada, ambil dari kfold k2 fold1
    kfold_sample_path = os.path.join(SPLIT_KFOLD_ROOT, "k2", "train_fold1.csv")
    if os.path.exists(kfold_sample_path):
        df_tmp = pd.read_csv(kfold_sample_path)
        sample_text = df_tmp[TEXT_COL].astype(str).iloc[0]

if sample_text is None:
    raise FileNotFoundError("Tidak menemukan file sample untuk token check (holdout train_80_10_10 atau kfold k2/train_fold1).")

# ========= TOKEN CHECK SEKALI AJA =========
tokenization_sanity_check_once(sample_text, max_len=MAX_LEN)


Sample text: pagi udah di buat emosi 

--- IndoBERT (indobenchmark/indobert-base-p1) ---
 input_ids: (1, 128)
 attention_mask: (1, 128)


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

 last_hidden_state: (1, 128, 768)

--- IndoBERTweet (indolem/indobertweet-base-uncased) ---
 input_ids: (1, 128)
 attention_mask: (1, 128)


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

 last_hidden_state: (1, 128, 768)

--- IndoRoBERTa (cahya/roberta-base-indonesian-522M) ---
 input_ids: (1, 128)
 attention_mask: (1, 128)


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

 last_hidden_state: (1, 128, 768)



# Run All

In [None]:
# ============================================================
# 13) RUN EVERYTHING
# ============================================================

# (A) Hold-out
holdout_df = run_holdout_training()

# (B) K-fold (k=2..10)
kfold_df = run_kfold_training(k_values=range(2, 11), token_check_fold=1)

# (C) Gabungkan rekap (opsional)
all_df = pd.concat([holdout_df, kfold_df], ignore_index=True)
all_path = os.path.join(RESULTS_ROOT, "results_all_final.csv")
all_df.to_csv(all_path, index=False)

print("\n[SUKSES] Semua selesai.")
print("Model tersimpan di:", MODELS_ROOT)
print("Rekap gabungan:", all_path)


===== HOLDOUT TRAINING: HO_80_10_10 =====
--- Training IndoBERT ---


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9824', 'grad_norm': '36.93', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.6781', 'eval_accuracy': '0.7553', 'eval_precision_macro': '0.7542', 'eval_recall_macro': '0.7885', 'eval_f1_macro': '0.7631', 'eval_runtime': '0.8552', 'eval_samples_per_second': '822', 'eval_steps_per_second': '51.45', 'epoch': '1'}
{'loss': '0.657', 'grad_norm': '28.56', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.6819', 'eval_accuracy': '0.7738', 'eval_precision_macro': '0.7743', 'eval_recall_macro': '0.7946', 'eval_f1_macro': '0.7808', 'eval_runtime': '0.8832', 'eval_samples_per_second': '796', 'eval_steps_per_second': '49.82', 'epoch': '2'}
{'loss': '0.558', 'grad_norm': '5.55', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8127', 'eval_accuracy': '0.7525', 'eval_precision_macro': '0.7535', 'eval_recall_macro': '0.7898', 'eval_f1_macro': '0.7598', 'eval_runtime': '0.8526', 'eval_samples_per_second': '824.5', 'eval_steps_per_second': '51.6', 'epoch': '3'}


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.623', 'grad_norm': '81.73', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9648', 'eval_accuracy': '0.67', 'eval_precision_macro': '0.684', 'eval_recall_macro': '0.6719', 'eval_f1_macro': '0.6667', 'eval_runtime': '0.9235', 'eval_samples_per_second': '761.3', 'eval_steps_per_second': '47.65', 'epoch': '1'}
{'loss': '0.9606', 'grad_norm': '131.9', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7488', 'eval_accuracy': '0.7553', 'eval_precision_macro': '0.7599', 'eval_recall_macro': '0.7824', 'eval_f1_macro': '0.7661', 'eval_runtime': '0.8452', 'eval_samples_per_second': '831.8', 'eval_steps_per_second': '52.06', 'epoch': '2'}
{'loss': '0.7776', 'grad_norm': '39.16', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7562', 'eval_accuracy': '0.7639', 'eval_precision_macro': '0.7616', 'eval_recall_macro': '0.7995', 'eval_f1_macro': '0.7729', 'eval_runtime': '0.8969', 'eval_samples_per_second': '783.8', 'eval_steps_per_second': '49.06', 'epoch': '

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.3', 'grad_norm': '26.72', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8726', 'eval_accuracy': '0.6984', 'eval_precision_macro': '0.6991', 'eval_recall_macro': '0.7394', 'eval_f1_macro': '0.7062', 'eval_runtime': '0.8627', 'eval_samples_per_second': '814.8', 'eval_steps_per_second': '51', 'epoch': '1'}
{'loss': '0.9073', 'grad_norm': '39.7', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8064', 'eval_accuracy': '0.7354', 'eval_precision_macro': '0.7398', 'eval_recall_macro': '0.7546', 'eval_f1_macro': '0.7432', 'eval_runtime': '0.8262', 'eval_samples_per_second': '850.9', 'eval_steps_per_second': '53.26', 'epoch': '2'}
{'loss': '0.7938', 'grad_norm': '9.022', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.827', 'eval_accuracy': '0.7312', 'eval_precision_macro': '0.7281', 'eval_recall_macro': '0.7597', 'eval_f1_macro': '0.7365', 'eval_runtime': '0.8458', 'eval_samples_per_second': '831.1', 'eval_steps_per_second': '52.02', 'epoch': '3'}


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.019', 'grad_norm': '21.31', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.7679', 'eval_accuracy': '0.7543', 'eval_precision_macro': '0.7533', 'eval_recall_macro': '0.7875', 'eval_f1_macro': '0.7623', 'eval_runtime': '1.365', 'eval_samples_per_second': '772.4', 'eval_steps_per_second': '48.37', 'epoch': '1'}
{'loss': '0.6899', 'grad_norm': '26.79', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8253', 'eval_accuracy': '0.7419', 'eval_precision_macro': '0.7469', 'eval_recall_macro': '0.7856', 'eval_f1_macro': '0.7523', 'eval_runtime': '1.295', 'eval_samples_per_second': '814.1', 'eval_steps_per_second': '50.98', 'epoch': '2'}
{'loss': '0.533', 'grad_norm': '21.43', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8086', 'eval_accuracy': '0.7657', 'eval_precision_macro': '0.7684', 'eval_recall_macro': '0.7974', 'eval_f1_macro': '0.7742', 'eval_runtime': '1.399', 'eval_samples_per_second': '753.4', 'eval_steps_per_second': '47.17', 'epoch': '3

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.64', 'grad_norm': '32.32', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.009', 'eval_accuracy': '0.6499', 'eval_precision_macro': '0.675', 'eval_recall_macro': '0.6752', 'eval_f1_macro': '0.6542', 'eval_runtime': '1.362', 'eval_samples_per_second': '774.1', 'eval_steps_per_second': '48.47', 'epoch': '1'}
{'loss': '0.9772', 'grad_norm': '25.86', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.841', 'eval_accuracy': '0.7353', 'eval_precision_macro': '0.7343', 'eval_recall_macro': '0.7783', 'eval_f1_macro': '0.7431', 'eval_runtime': '1.351', 'eval_samples_per_second': '780.3', 'eval_steps_per_second': '48.86', 'epoch': '2'}
{'loss': '0.7814', 'grad_norm': '49.9', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7704', 'eval_accuracy': '0.7514', 'eval_precision_macro': '0.7516', 'eval_recall_macro': '0.7879', 'eval_f1_macro': '0.7619', 'eval_runtime': '1.498', 'eval_samples_per_second': '703.4', 'eval_steps_per_second': '44.05', 'epoch': '3'}
{

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.348', 'grad_norm': '30.03', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.011', 'eval_accuracy': '0.6727', 'eval_precision_macro': '0.6823', 'eval_recall_macro': '0.7002', 'eval_f1_macro': '0.678', 'eval_runtime': '1.332', 'eval_samples_per_second': '791.6', 'eval_steps_per_second': '49.57', 'epoch': '1'}
{'loss': '0.9262', 'grad_norm': '26.2', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.962', 'eval_accuracy': '0.7002', 'eval_precision_macro': '0.6997', 'eval_recall_macro': '0.7407', 'eval_f1_macro': '0.7055', 'eval_runtime': '1.325', 'eval_samples_per_second': '795.6', 'eval_steps_per_second': '49.82', 'epoch': '2'}
{'loss': '0.7795', 'grad_norm': '29.18', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.9419', 'eval_accuracy': '0.6898', 'eval_precision_macro': '0.6961', 'eval_recall_macro': '0.7241', 'eval_f1_macro': '0.6964', 'eval_runtime': '1.322', 'eval_samples_per_second': '797.4', 'eval_steps_per_second': '49.94', 'epoch': '3'}


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9955', 'grad_norm': '18.3', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.7542', 'eval_accuracy': '0.7457', 'eval_precision_macro': '0.7491', 'eval_recall_macro': '0.7777', 'eval_f1_macro': '0.7538', 'eval_runtime': '1.441', 'eval_samples_per_second': '731.5', 'eval_steps_per_second': '45.8', 'epoch': '1'}
{'loss': '0.6744', 'grad_norm': '30.16', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.819', 'eval_accuracy': '0.74', 'eval_precision_macro': '0.743', 'eval_recall_macro': '0.78', 'eval_f1_macro': '0.7485', 'eval_runtime': '1.29', 'eval_samples_per_second': '816.9', 'eval_steps_per_second': '51.15', 'epoch': '2'}
{'loss': '0.5407', 'grad_norm': '23.93', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7991', 'eval_accuracy': '0.7571', 'eval_precision_macro': '0.7652', 'eval_recall_macro': '0.7853', 'eval_f1_macro': '0.7672', 'eval_runtime': '1.331', 'eval_samples_per_second': '791.8', 'eval_steps_per_second': '49.58', 'epoch': '3'}
{'lo

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.64', 'grad_norm': '32.32', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.009', 'eval_accuracy': '0.6499', 'eval_precision_macro': '0.675', 'eval_recall_macro': '0.6752', 'eval_f1_macro': '0.6542', 'eval_runtime': '1.377', 'eval_samples_per_second': '765.5', 'eval_steps_per_second': '47.94', 'epoch': '1'}
{'loss': '0.9772', 'grad_norm': '25.86', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.841', 'eval_accuracy': '0.7353', 'eval_precision_macro': '0.7343', 'eval_recall_macro': '0.7783', 'eval_f1_macro': '0.7431', 'eval_runtime': '1.281', 'eval_samples_per_second': '823', 'eval_steps_per_second': '51.54', 'epoch': '2'}
{'loss': '0.7814', 'grad_norm': '49.9', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7704', 'eval_accuracy': '0.7514', 'eval_precision_macro': '0.7516', 'eval_recall_macro': '0.7879', 'eval_f1_macro': '0.7619', 'eval_runtime': '1.462', 'eval_samples_per_second': '720.8', 'eval_steps_per_second': '45.13', 'epoch': '3'}
{'l

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.348', 'grad_norm': '30.03', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.011', 'eval_accuracy': '0.6727', 'eval_precision_macro': '0.6823', 'eval_recall_macro': '0.7002', 'eval_f1_macro': '0.678', 'eval_runtime': '1.282', 'eval_samples_per_second': '822.3', 'eval_steps_per_second': '51.49', 'epoch': '1'}
{'loss': '0.9262', 'grad_norm': '26.2', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.962', 'eval_accuracy': '0.7002', 'eval_precision_macro': '0.6997', 'eval_recall_macro': '0.7407', 'eval_f1_macro': '0.7055', 'eval_runtime': '1.238', 'eval_samples_per_second': '851.1', 'eval_steps_per_second': '53.3', 'epoch': '2'}
{'loss': '0.7795', 'grad_norm': '29.18', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.9419', 'eval_accuracy': '0.6898', 'eval_precision_macro': '0.6961', 'eval_recall_macro': '0.7241', 'eval_f1_macro': '0.6964', 'eval_runtime': '1.356', 'eval_samples_per_second': '777.3', 'eval_steps_per_second': '48.67', 'epoch': '3'}
{

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

last_hidden_state: (1, 128, 768)

--- IndoBERTweet (indolem/indobertweet-base-uncased) ---
input_ids: (1, 128)
attention_mask: (1, 128)


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

last_hidden_state: (1, 128, 768)

--- IndoRoBERTa (cahya/roberta-base-indonesian-522M) ---
input_ids: (1, 128)
attention_mask: (1, 128)


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

last_hidden_state: (1, 128, 768)


===== TRAIN KFOLD k=2 fold=1 | IndoBERT =====


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.078', 'grad_norm': '21.21', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8936', 'eval_accuracy': '0.7083', 'eval_precision_macro': '0.7142', 'eval_recall_macro': '0.7521', 'eval_f1_macro': '0.7174', 'eval_runtime': '4.283', 'eval_samples_per_second': '820.4', 'eval_steps_per_second': '51.37', 'epoch': '1'}
{'loss': '0.6883', 'grad_norm': '18.77', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8437', 'eval_accuracy': '0.7314', 'eval_precision_macro': '0.731', 'eval_recall_macro': '0.7641', 'eval_f1_macro': '0.739', 'eval_runtime': '4.324', 'eval_samples_per_second': '812.7', 'eval_steps_per_second': '50.88', 'epoch': '2'}
{'loss': '0.5363', 'grad_norm': '37.33', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8311', 'eval_accuracy': '0.7456', 'eval_precision_macro': '0.7497', 'eval_recall_macro': '0.7633', 'eval_f1_macro': '0.7529', 'eval_runtime': '4.255', 'eval_samples_per_second': '825.7', 'eval_steps_per_second': '51.7', 'epoch': '3'}

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.669', 'grad_norm': '41.32', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.14', 'eval_accuracy': '0.6209', 'eval_precision_macro': '0.6291', 'eval_recall_macro': '0.6196', 'eval_f1_macro': '0.6177', 'eval_runtime': '4.374', 'eval_samples_per_second': '803.4', 'eval_steps_per_second': '50.3', 'epoch': '1'}
{'loss': '1.079', 'grad_norm': '33.77', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8291', 'eval_accuracy': '0.7325', 'eval_precision_macro': '0.73', 'eval_recall_macro': '0.7641', 'eval_f1_macro': '0.7401', 'eval_runtime': '4.439', 'eval_samples_per_second': '791.5', 'eval_steps_per_second': '49.55', 'epoch': '2'}
{'loss': '0.8464', 'grad_norm': '44.92', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7875', 'eval_accuracy': '0.7518', 'eval_precision_macro': '0.753', 'eval_recall_macro': '0.7756', 'eval_f1_macro': '0.7593', 'eval_runtime': '4.211', 'eval_samples_per_second': '834.5', 'eval_steps_per_second': '52.25', 'epoch': '3'}
{'l

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.445', 'grad_norm': '26.73', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.156', 'eval_accuracy': '0.6323', 'eval_precision_macro': '0.6368', 'eval_recall_macro': '0.6615', 'eval_f1_macro': '0.6399', 'eval_runtime': '4.093', 'eval_samples_per_second': '858.6', 'eval_steps_per_second': '53.75', 'epoch': '1'}
{'loss': '0.9724', 'grad_norm': '14.87', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '1.061', 'eval_accuracy': '0.6801', 'eval_precision_macro': '0.6834', 'eval_recall_macro': '0.7151', 'eval_f1_macro': '0.6884', 'eval_runtime': '4.305', 'eval_samples_per_second': '816.3', 'eval_steps_per_second': '51.1', 'epoch': '2'}
{'loss': '0.8176', 'grad_norm': '47.37', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '1.057', 'eval_accuracy': '0.6799', 'eval_precision_macro': '0.6974', 'eval_recall_macro': '0.6991', 'eval_f1_macro': '0.689', 'eval_runtime': '4.484', 'eval_samples_per_second': '783.6', 'eval_steps_per_second': '49.06', 'epoch': '3'}
{

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.102', 'grad_norm': '14.97', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9563', 'eval_accuracy': '0.6949', 'eval_precision_macro': '0.7106', 'eval_recall_macro': '0.7347', 'eval_f1_macro': '0.7038', 'eval_runtime': '4.473', 'eval_samples_per_second': '785.5', 'eval_steps_per_second': '49.18', 'epoch': '1'}
{'loss': '0.7191', 'grad_norm': '16.32', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.817', 'eval_accuracy': '0.7339', 'eval_precision_macro': '0.7424', 'eval_recall_macro': '0.772', 'eval_f1_macro': '0.7434', 'eval_runtime': '4.297', 'eval_samples_per_second': '817.7', 'eval_steps_per_second': '51.2', 'epoch': '2'}
{'loss': '0.586', 'grad_norm': '20.31', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8463', 'eval_accuracy': '0.7353', 'eval_precision_macro': '0.7327', 'eval_recall_macro': '0.7692', 'eval_f1_macro': '0.7427', 'eval_runtime': '4.703', 'eval_samples_per_second': '747.2', 'eval_steps_per_second': '46.78', 'epoch': '3'}


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.699', 'grad_norm': '34.34', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.263', 'eval_accuracy': '0.547', 'eval_precision_macro': '0.5881', 'eval_recall_macro': '0.5424', 'eval_f1_macro': '0.5303', 'eval_runtime': '4.294', 'eval_samples_per_second': '818.3', 'eval_steps_per_second': '51.23', 'epoch': '1'}
{'loss': '1.157', 'grad_norm': '35', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8129', 'eval_accuracy': '0.7217', 'eval_precision_macro': '0.7213', 'eval_recall_macro': '0.7523', 'eval_f1_macro': '0.7291', 'eval_runtime': '4.431', 'eval_samples_per_second': '793', 'eval_steps_per_second': '49.65', 'epoch': '2'}
{'loss': '0.8738', 'grad_norm': '29.53', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7536', 'eval_accuracy': '0.743', 'eval_precision_macro': '0.7403', 'eval_recall_macro': '0.7738', 'eval_f1_macro': '0.752', 'eval_runtime': '4.254', 'eval_samples_per_second': '826.1', 'eval_steps_per_second': '51.72', 'epoch': '3'}
{'loss

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.432', 'grad_norm': '20.07', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.053', 'eval_accuracy': '0.6389', 'eval_precision_macro': '0.6443', 'eval_recall_macro': '0.6685', 'eval_f1_macro': '0.6418', 'eval_runtime': '4.093', 'eval_samples_per_second': '858.4', 'eval_steps_per_second': '53.74', 'epoch': '1'}
{'loss': '0.9808', 'grad_norm': '20.45', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '1.001', 'eval_accuracy': '0.6773', 'eval_precision_macro': '0.6814', 'eval_recall_macro': '0.7219', 'eval_f1_macro': '0.6849', 'eval_runtime': '4.408', 'eval_samples_per_second': '797.3', 'eval_steps_per_second': '49.91', 'epoch': '2'}
{'loss': '0.8455', 'grad_norm': '15.56', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '1.03', 'eval_accuracy': '0.6719', 'eval_precision_macro': '0.6855', 'eval_recall_macro': '0.7012', 'eval_f1_macro': '0.6762', 'eval_runtime': '4.349', 'eval_samples_per_second': '807.9', 'eval_steps_per_second': '50.58', 'epoch': '3'}


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

last_hidden_state: (1, 128, 768)

--- IndoBERTweet (indolem/indobertweet-base-uncased) ---
input_ids: (1, 128)
attention_mask: (1, 128)


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

last_hidden_state: (1, 128, 768)

--- IndoRoBERTa (cahya/roberta-base-indonesian-522M) ---
input_ids: (1, 128)
attention_mask: (1, 128)


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

last_hidden_state: (1, 128, 768)


===== TRAIN KFOLD k=3 fold=1 | IndoBERT =====


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.057', 'grad_norm': '9.203', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.7335', 'eval_accuracy': '0.7533', 'eval_precision_macro': '0.7536', 'eval_recall_macro': '0.7825', 'eval_f1_macro': '0.7631', 'eval_runtime': '3.128', 'eval_samples_per_second': '749.1', 'eval_steps_per_second': '47', 'epoch': '1'}
{'loss': '0.6918', 'grad_norm': '23.8', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7362', 'eval_accuracy': '0.7623', 'eval_precision_macro': '0.7605', 'eval_recall_macro': '0.7891', 'eval_f1_macro': '0.7687', 'eval_runtime': '2.829', 'eval_samples_per_second': '828.1', 'eval_steps_per_second': '51.95', 'epoch': '2'}
{'loss': '0.554', 'grad_norm': '18.57', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7649', 'eval_accuracy': '0.7601', 'eval_precision_macro': '0.7617', 'eval_recall_macro': '0.778', 'eval_f1_macro': '0.7686', 'eval_runtime': '3.039', 'eval_samples_per_second': '771', 'eval_steps_per_second': '48.37', 'epoch': '3'}
{'lo

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.606', 'grad_norm': '28.45', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.977', 'eval_accuracy': '0.6581', 'eval_precision_macro': '0.6625', 'eval_recall_macro': '0.6748', 'eval_f1_macro': '0.6594', 'eval_runtime': '2.878', 'eval_samples_per_second': '814.1', 'eval_steps_per_second': '51.08', 'epoch': '1'}
{'loss': '0.9821', 'grad_norm': '48.18', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7588', 'eval_accuracy': '0.7414', 'eval_precision_macro': '0.7382', 'eval_recall_macro': '0.7739', 'eval_f1_macro': '0.7488', 'eval_runtime': '2.88', 'eval_samples_per_second': '813.6', 'eval_steps_per_second': '51.05', 'epoch': '2'}
{'loss': '0.807', 'grad_norm': '15.39', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.6811', 'eval_accuracy': '0.7712', 'eval_precision_macro': '0.7704', 'eval_recall_macro': '0.7944', 'eval_f1_macro': '0.7797', 'eval_runtime': '2.928', 'eval_samples_per_second': '800.3', 'eval_steps_per_second': '50.21', 'epoch': '3'}

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.366', 'grad_norm': '13.94', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9584', 'eval_accuracy': '0.6876', 'eval_precision_macro': '0.6839', 'eval_recall_macro': '0.7186', 'eval_f1_macro': '0.6953', 'eval_runtime': '2.975', 'eval_samples_per_second': '787.5', 'eval_steps_per_second': '49.41', 'epoch': '1'}
{'loss': '0.9261', 'grad_norm': '26.33', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.9093', 'eval_accuracy': '0.7089', 'eval_precision_macro': '0.7067', 'eval_recall_macro': '0.7402', 'eval_f1_macro': '0.7164', 'eval_runtime': '2.771', 'eval_samples_per_second': '845.7', 'eval_steps_per_second': '53.06', 'epoch': '2'}
{'loss': '0.7871', 'grad_norm': '16.51', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.895', 'eval_accuracy': '0.7068', 'eval_precision_macro': '0.7055', 'eval_recall_macro': '0.7365', 'eval_f1_macro': '0.7156', 'eval_runtime': '2.766', 'eval_samples_per_second': '847.1', 'eval_steps_per_second': '53.15', 'epoch': '3

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.027', 'grad_norm': '22.44', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.7577', 'eval_accuracy': '0.7409', 'eval_precision_macro': '0.7375', 'eval_recall_macro': '0.7711', 'eval_f1_macro': '0.7481', 'eval_runtime': '2.955', 'eval_samples_per_second': '792.8', 'eval_steps_per_second': '49.74', 'epoch': '1'}
{'loss': '0.7052', 'grad_norm': '19.87', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7327', 'eval_accuracy': '0.7397', 'eval_precision_macro': '0.7391', 'eval_recall_macro': '0.7723', 'eval_f1_macro': '0.7466', 'eval_runtime': '2.867', 'eval_samples_per_second': '817.2', 'eval_steps_per_second': '51.27', 'epoch': '2'}
{'loss': '0.5457', 'grad_norm': '21.4', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7652', 'eval_accuracy': '0.7533', 'eval_precision_macro': '0.7623', 'eval_recall_macro': '0.7667', 'eval_f1_macro': '0.7606', 'eval_runtime': '2.932', 'eval_samples_per_second': '799.1', 'eval_steps_per_second': '50.13', 'epoch': '3

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.546', 'grad_norm': '94.74', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.035', 'eval_accuracy': '0.644', 'eval_precision_macro': '0.6506', 'eval_recall_macro': '0.6801', 'eval_f1_macro': '0.6493', 'eval_runtime': '3.029', 'eval_samples_per_second': '773.4', 'eval_steps_per_second': '48.52', 'epoch': '1'}
{'loss': '0.9746', 'grad_norm': '25.54', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7746', 'eval_accuracy': '0.7388', 'eval_precision_macro': '0.7342', 'eval_recall_macro': '0.7699', 'eval_f1_macro': '0.7459', 'eval_runtime': '2.938', 'eval_samples_per_second': '797.4', 'eval_steps_per_second': '50.03', 'epoch': '2'}
{'loss': '0.7871', 'grad_norm': '31.38', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7069', 'eval_accuracy': '0.7618', 'eval_precision_macro': '0.7643', 'eval_recall_macro': '0.7841', 'eval_f1_macro': '0.7692', 'eval_runtime': '3.036', 'eval_samples_per_second': '771.8', 'eval_steps_per_second': '48.42', 'epoch': '3'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.412', 'grad_norm': '21.04', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.003', 'eval_accuracy': '0.6607', 'eval_precision_macro': '0.67', 'eval_recall_macro': '0.6786', 'eval_f1_macro': '0.6685', 'eval_runtime': '2.841', 'eval_samples_per_second': '824.8', 'eval_steps_per_second': '51.75', 'epoch': '1'}
{'loss': '0.9516', 'grad_norm': '19.39', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.9424', 'eval_accuracy': '0.688', 'eval_precision_macro': '0.6867', 'eval_recall_macro': '0.7239', 'eval_f1_macro': '0.696', 'eval_runtime': '3.072', 'eval_samples_per_second': '762.7', 'eval_steps_per_second': '47.85', 'epoch': '2'}
{'loss': '0.8005', 'grad_norm': '18.23', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8752', 'eval_accuracy': '0.6987', 'eval_precision_macro': '0.706', 'eval_recall_macro': '0.7161', 'eval_f1_macro': '0.7067', 'eval_runtime': '2.758', 'eval_samples_per_second': '849.6', 'eval_steps_per_second': '53.31', 'epoch': '3'}
{'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.015', 'grad_norm': '11.46', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.764', 'eval_accuracy': '0.7412', 'eval_precision_macro': '0.7429', 'eval_recall_macro': '0.7645', 'eval_f1_macro': '0.7487', 'eval_runtime': '2.856', 'eval_samples_per_second': '820.1', 'eval_steps_per_second': '51.48', 'epoch': '1'}
{'loss': '0.6797', 'grad_norm': '18.68', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7609', 'eval_accuracy': '0.7511', 'eval_precision_macro': '0.7534', 'eval_recall_macro': '0.7782', 'eval_f1_macro': '0.7591', 'eval_runtime': '2.859', 'eval_samples_per_second': '819.1', 'eval_steps_per_second': '51.41', 'epoch': '2'}
{'loss': '0.5349', 'grad_norm': '16.01', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.872', 'eval_accuracy': '0.731', 'eval_precision_macro': '0.7507', 'eval_recall_macro': '0.7318', 'eval_f1_macro': '0.7365', 'eval_runtime': '2.839', 'eval_samples_per_second': '824.9', 'eval_steps_per_second': '51.78', 'epoch': '3'}

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.729', 'grad_norm': '67.04', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.429', 'eval_accuracy': '0.45', 'eval_precision_macro': '0.5054', 'eval_recall_macro': '0.4567', 'eval_f1_macro': '0.4101', 'eval_runtime': '2.959', 'eval_samples_per_second': '791.4', 'eval_steps_per_second': '49.67', 'epoch': '1'}
{'loss': '1.222', 'grad_norm': '49.32', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.833', 'eval_accuracy': '0.7135', 'eval_precision_macro': '0.7182', 'eval_recall_macro': '0.7451', 'eval_f1_macro': '0.7224', 'eval_runtime': '2.878', 'eval_samples_per_second': '813.8', 'eval_steps_per_second': '51.08', 'epoch': '2'}
{'loss': '0.8415', 'grad_norm': '17.68', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7592', 'eval_accuracy': '0.7447', 'eval_precision_macro': '0.7418', 'eval_recall_macro': '0.7727', 'eval_f1_macro': '0.7532', 'eval_runtime': '3.046', 'eval_samples_per_second': '768.9', 'eval_steps_per_second': '48.26', 'epoch': '3'}
{

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.326', 'grad_norm': '14.41', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.046', 'eval_accuracy': '0.6674', 'eval_precision_macro': '0.6668', 'eval_recall_macro': '0.7016', 'eval_f1_macro': '0.6734', 'eval_runtime': '2.883', 'eval_samples_per_second': '812.4', 'eval_steps_per_second': '50.99', 'epoch': '1'}
{'loss': '0.9353', 'grad_norm': '17.97', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.9461', 'eval_accuracy': '0.6951', 'eval_precision_macro': '0.6991', 'eval_recall_macro': '0.7302', 'eval_f1_macro': '0.7027', 'eval_runtime': '3.025', 'eval_samples_per_second': '774.3', 'eval_steps_per_second': '48.6', 'epoch': '2'}
{'loss': '0.7814', 'grad_norm': '13.51', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8822', 'eval_accuracy': '0.7105', 'eval_precision_macro': '0.7125', 'eval_recall_macro': '0.7187', 'eval_f1_macro': '0.7152', 'eval_runtime': '2.841', 'eval_samples_per_second': '824.4', 'eval_steps_per_second': '51.75', 'epoch': '3'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

last_hidden_state: (1, 128, 768)

--- IndoBERTweet (indolem/indobertweet-base-uncased) ---
input_ids: (1, 128)
attention_mask: (1, 128)


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

last_hidden_state: (1, 128, 768)

--- IndoRoBERTa (cahya/roberta-base-indonesian-522M) ---
input_ids: (1, 128)
attention_mask: (1, 128)


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

last_hidden_state: (1, 128, 768)


===== TRAIN KFOLD k=4 fold=1 | IndoBERT =====


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9991', 'grad_norm': '8.045', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.7629', 'eval_accuracy': '0.7473', 'eval_precision_macro': '0.7506', 'eval_recall_macro': '0.7666', 'eval_f1_macro': '0.7519', 'eval_runtime': '2.151', 'eval_samples_per_second': '816.7', 'eval_steps_per_second': '51.13', 'epoch': '1'}
{'loss': '0.6621', 'grad_norm': '19.87', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7605', 'eval_accuracy': '0.7518', 'eval_precision_macro': '0.7533', 'eval_recall_macro': '0.7701', 'eval_f1_macro': '0.7591', 'eval_runtime': '2.149', 'eval_samples_per_second': '817.4', 'eval_steps_per_second': '51.18', 'epoch': '2'}
{'loss': '0.5345', 'grad_norm': '32.68', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8626', 'eval_accuracy': '0.7479', 'eval_precision_macro': '0.7477', 'eval_recall_macro': '0.7714', 'eval_f1_macro': '0.7558', 'eval_runtime': '2.283', 'eval_samples_per_second': '769.6', 'eval_steps_per_second': '48.18', 'epoch': 

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.511', 'grad_norm': '28', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8467', 'eval_accuracy': '0.7046', 'eval_precision_macro': '0.7155', 'eval_recall_macro': '0.7156', 'eval_f1_macro': '0.7093', 'eval_runtime': '2.459', 'eval_samples_per_second': '714.4', 'eval_steps_per_second': '44.73', 'epoch': '1'}
{'loss': '0.9188', 'grad_norm': '41.79', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7098', 'eval_accuracy': '0.7649', 'eval_precision_macro': '0.7689', 'eval_recall_macro': '0.7807', 'eval_f1_macro': '0.7714', 'eval_runtime': '2.132', 'eval_samples_per_second': '824.2', 'eval_steps_per_second': '51.6', 'epoch': '2'}
{'loss': '0.7616', 'grad_norm': '48.28', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7331', 'eval_accuracy': '0.7558', 'eval_precision_macro': '0.7536', 'eval_recall_macro': '0.7915', 'eval_f1_macro': '0.7656', 'eval_runtime': '2.13', 'eval_samples_per_second': '825', 'eval_steps_per_second': '51.65', 'epoch': '3'}
{'l

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.407', 'grad_norm': '14.54', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9993', 'eval_accuracy': '0.6699', 'eval_precision_macro': '0.6786', 'eval_recall_macro': '0.6941', 'eval_f1_macro': '0.6787', 'eval_runtime': '2.168', 'eval_samples_per_second': '810.4', 'eval_steps_per_second': '50.73', 'epoch': '1'}
{'loss': '0.9389', 'grad_norm': '31.89', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.9084', 'eval_accuracy': '0.6978', 'eval_precision_macro': '0.7087', 'eval_recall_macro': '0.7108', 'eval_f1_macro': '0.7045', 'eval_runtime': '2.083', 'eval_samples_per_second': '843.3', 'eval_steps_per_second': '52.8', 'epoch': '2'}
{'loss': '0.7839', 'grad_norm': '29.24', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.9764', 'eval_accuracy': '0.7103', 'eval_precision_macro': '0.7072', 'eval_recall_macro': '0.7468', 'eval_f1_macro': '0.7178', 'eval_runtime': '2.065', 'eval_samples_per_second': '850.8', 'eval_steps_per_second': '53.27', 'epoch': '3

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.996', 'grad_norm': '13.17', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.7233', 'eval_accuracy': '0.7598', 'eval_precision_macro': '0.7579', 'eval_recall_macro': '0.7822', 'eval_f1_macro': '0.7642', 'eval_runtime': '2.482', 'eval_samples_per_second': '707.8', 'eval_steps_per_second': '44.32', 'epoch': '1'}
{'loss': '0.6617', 'grad_norm': '25.6', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.725', 'eval_accuracy': '0.7581', 'eval_precision_macro': '0.7652', 'eval_recall_macro': '0.7704', 'eval_f1_macro': '0.7634', 'eval_runtime': '2.181', 'eval_samples_per_second': '805.6', 'eval_steps_per_second': '50.43', 'epoch': '2'}
{'loss': '0.5313', 'grad_norm': '12.99', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8999', 'eval_accuracy': '0.7536', 'eval_precision_macro': '0.7529', 'eval_recall_macro': '0.7804', 'eval_f1_macro': '0.7588', 'eval_runtime': '2.279', 'eval_samples_per_second': '770.8', 'eval_steps_per_second': '48.26', 'epoch': '3'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.589', 'grad_norm': '56.48', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9561', 'eval_accuracy': '0.6693', 'eval_precision_macro': '0.6747', 'eval_recall_macro': '0.6741', 'eval_f1_macro': '0.6659', 'eval_runtime': '2.251', 'eval_samples_per_second': '780.5', 'eval_steps_per_second': '48.86', 'epoch': '1'}
{'loss': '0.9702', 'grad_norm': '44.86', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7336', 'eval_accuracy': '0.753', 'eval_precision_macro': '0.751', 'eval_recall_macro': '0.7725', 'eval_f1_macro': '0.7588', 'eval_runtime': '2.143', 'eval_samples_per_second': '820', 'eval_steps_per_second': '51.34', 'epoch': '2'}
{'loss': '0.8012', 'grad_norm': '29.59', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7729', 'eval_accuracy': '0.7501', 'eval_precision_macro': '0.7484', 'eval_recall_macro': '0.7821', 'eval_f1_macro': '0.7575', 'eval_runtime': '2.155', 'eval_samples_per_second': '815.1', 'eval_steps_per_second': '51.03', 'epoch': '3'}


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.322', 'grad_norm': '13.78', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.03', 'eval_accuracy': '0.6784', 'eval_precision_macro': '0.6928', 'eval_recall_macro': '0.696', 'eval_f1_macro': '0.686', 'eval_runtime': '2.066', 'eval_samples_per_second': '850.4', 'eval_steps_per_second': '53.24', 'epoch': '1'}
{'loss': '0.9078', 'grad_norm': '27.7', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.9157', 'eval_accuracy': '0.6932', 'eval_precision_macro': '0.6971', 'eval_recall_macro': '0.7168', 'eval_f1_macro': '0.7', 'eval_runtime': '2.07', 'eval_samples_per_second': '848.7', 'eval_steps_per_second': '53.14', 'epoch': '2'}
{'loss': '0.7618', 'grad_norm': '18.12', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '1.05', 'eval_accuracy': '0.6864', 'eval_precision_macro': '0.6899', 'eval_recall_macro': '0.7289', 'eval_f1_macro': '0.6931', 'eval_runtime': '2.204', 'eval_samples_per_second': '797', 'eval_steps_per_second': '49.9', 'epoch': '3'}
{'loss': '

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.014', 'grad_norm': '21.44', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.7013', 'eval_accuracy': '0.7479', 'eval_precision_macro': '0.7513', 'eval_recall_macro': '0.7665', 'eval_f1_macro': '0.7542', 'eval_runtime': '2.179', 'eval_samples_per_second': '806.3', 'eval_steps_per_second': '50.48', 'epoch': '1'}
{'loss': '0.689', 'grad_norm': '30.01', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.6924', 'eval_accuracy': '0.7632', 'eval_precision_macro': '0.7742', 'eval_recall_macro': '0.7793', 'eval_f1_macro': '0.7726', 'eval_runtime': '2.165', 'eval_samples_per_second': '811.6', 'eval_steps_per_second': '50.81', 'epoch': '2'}
{'loss': '0.558', 'grad_norm': '13.89', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7225', 'eval_accuracy': '0.7701', 'eval_precision_macro': '0.771', 'eval_recall_macro': '0.7949', 'eval_f1_macro': '0.7785', 'eval_runtime': '2.137', 'eval_samples_per_second': '822.2', 'eval_steps_per_second': '51.47', 'epoch': '3'}

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.584', 'grad_norm': '46.65', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9018', 'eval_accuracy': '0.7052', 'eval_precision_macro': '0.7114', 'eval_recall_macro': '0.7129', 'eval_f1_macro': '0.7092', 'eval_runtime': '2.205', 'eval_samples_per_second': '796.7', 'eval_steps_per_second': '49.88', 'epoch': '1'}
{'loss': '0.966', 'grad_norm': '52.59', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.6998', 'eval_accuracy': '0.7513', 'eval_precision_macro': '0.762', 'eval_recall_macro': '0.7628', 'eval_f1_macro': '0.7597', 'eval_runtime': '2.163', 'eval_samples_per_second': '812.4', 'eval_steps_per_second': '50.86', 'epoch': '2'}
{'loss': '0.7964', 'grad_norm': '29.5', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7309', 'eval_accuracy': '0.7501', 'eval_precision_macro': '0.7511', 'eval_recall_macro': '0.7861', 'eval_f1_macro': '0.7605', 'eval_runtime': '2.143', 'eval_samples_per_second': '819.8', 'eval_steps_per_second': '51.32', 'epoch': '3'}

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.317', 'grad_norm': '17.18', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9494', 'eval_accuracy': '0.7001', 'eval_precision_macro': '0.709', 'eval_recall_macro': '0.7124', 'eval_f1_macro': '0.702', 'eval_runtime': '2.385', 'eval_samples_per_second': '736.6', 'eval_steps_per_second': '46.12', 'epoch': '1'}
{'loss': '0.9534', 'grad_norm': '30.5', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8729', 'eval_accuracy': '0.724', 'eval_precision_macro': '0.727', 'eval_recall_macro': '0.7392', 'eval_f1_macro': '0.7318', 'eval_runtime': '2.074', 'eval_samples_per_second': '847.4', 'eval_steps_per_second': '53.05', 'epoch': '2'}
{'loss': '0.8042', 'grad_norm': '24.46', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.886', 'eval_accuracy': '0.7097', 'eval_precision_macro': '0.7074', 'eval_recall_macro': '0.7482', 'eval_f1_macro': '0.7162', 'eval_runtime': '2.064', 'eval_samples_per_second': '851.2', 'eval_steps_per_second': '53.29', 'epoch': '3'}
{'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.004', 'grad_norm': '6.653', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.7771', 'eval_accuracy': '0.7473', 'eval_precision_macro': '0.7514', 'eval_recall_macro': '0.7657', 'eval_f1_macro': '0.7545', 'eval_runtime': '2.72', 'eval_samples_per_second': '645.9', 'eval_steps_per_second': '40.44', 'epoch': '1'}
{'loss': '0.6761', 'grad_norm': '39.86', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8258', 'eval_accuracy': '0.7319', 'eval_precision_macro': '0.7359', 'eval_recall_macro': '0.7582', 'eval_f1_macro': '0.7386', 'eval_runtime': '2.121', 'eval_samples_per_second': '828.5', 'eval_steps_per_second': '51.87', 'epoch': '2'}
{'loss': '0.5358', 'grad_norm': '25.06', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8804', 'eval_accuracy': '0.7348', 'eval_precision_macro': '0.7346', 'eval_recall_macro': '0.7632', 'eval_f1_macro': '0.7421', 'eval_runtime': '2.156', 'eval_samples_per_second': '814.9', 'eval_steps_per_second': '51.02', 'epoch': '3

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.603', 'grad_norm': '60.77', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9912', 'eval_accuracy': '0.654', 'eval_precision_macro': '0.6556', 'eval_recall_macro': '0.6618', 'eval_f1_macro': '0.6572', 'eval_runtime': '2.455', 'eval_samples_per_second': '715.6', 'eval_steps_per_second': '44.8', 'epoch': '1'}
{'loss': '1.005', 'grad_norm': '43.34', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7618', 'eval_accuracy': '0.7319', 'eval_precision_macro': '0.7321', 'eval_recall_macro': '0.7537', 'eval_f1_macro': '0.7405', 'eval_runtime': '2.125', 'eval_samples_per_second': '826.8', 'eval_steps_per_second': '51.76', 'epoch': '2'}
{'loss': '0.8038', 'grad_norm': '61.07', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7573', 'eval_accuracy': '0.7536', 'eval_precision_macro': '0.7539', 'eval_recall_macro': '0.7785', 'eval_f1_macro': '0.7633', 'eval_runtime': '2.133', 'eval_samples_per_second': '823.6', 'eval_steps_per_second': '51.56', 'epoch': '3'}

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.351', 'grad_norm': '28.43', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9754', 'eval_accuracy': '0.6841', 'eval_precision_macro': '0.6845', 'eval_recall_macro': '0.715', 'eval_f1_macro': '0.6916', 'eval_runtime': '2.08', 'eval_samples_per_second': '844.8', 'eval_steps_per_second': '52.89', 'epoch': '1'}
{'loss': '0.9187', 'grad_norm': '27.31', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.9175', 'eval_accuracy': '0.7001', 'eval_precision_macro': '0.6989', 'eval_recall_macro': '0.72', 'eval_f1_macro': '0.705', 'eval_runtime': '2.059', 'eval_samples_per_second': '853.5', 'eval_steps_per_second': '53.44', 'epoch': '2'}
{'loss': '0.7675', 'grad_norm': '14.76', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.959', 'eval_accuracy': '0.6989', 'eval_precision_macro': '0.7019', 'eval_recall_macro': '0.7288', 'eval_f1_macro': '0.7054', 'eval_runtime': '2.062', 'eval_samples_per_second': '852.3', 'eval_steps_per_second': '53.36', 'epoch': '3'}
{'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

last_hidden_state: (1, 128, 768)

--- IndoBERTweet (indolem/indobertweet-base-uncased) ---
input_ids: (1, 128)
attention_mask: (1, 128)


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

last_hidden_state: (1, 128, 768)

--- IndoRoBERTa (cahya/roberta-base-indonesian-522M) ---
input_ids: (1, 128)
attention_mask: (1, 128)


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

last_hidden_state: (1, 128, 768)


===== TRAIN KFOLD k=5 fold=1 | IndoBERT =====


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9891', 'grad_norm': '38.6', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8673', 'eval_accuracy': '0.7262', 'eval_precision_macro': '0.739', 'eval_recall_macro': '0.7471', 'eval_f1_macro': '0.7364', 'eval_runtime': '1.685', 'eval_samples_per_second': '834.6', 'eval_steps_per_second': '52.24', 'epoch': '1'}
{'loss': '0.6572', 'grad_norm': '40.3', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8516', 'eval_accuracy': '0.7397', 'eval_precision_macro': '0.737', 'eval_recall_macro': '0.7695', 'eval_f1_macro': '0.7472', 'eval_runtime': '1.722', 'eval_samples_per_second': '816.6', 'eval_steps_per_second': '51.11', 'epoch': '2'}
{'loss': '0.5259', 'grad_norm': '23.31', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7971', 'eval_accuracy': '0.7518', 'eval_precision_macro': '0.7594', 'eval_recall_macro': '0.7633', 'eval_f1_macro': '0.761', 'eval_runtime': '1.898', 'eval_samples_per_second': '740.7', 'eval_steps_per_second': '46.36', 'epoch': '3'}


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.475', 'grad_norm': '31.43', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8407', 'eval_accuracy': '0.7134', 'eval_precision_macro': '0.7212', 'eval_recall_macro': '0.7304', 'eval_f1_macro': '0.7233', 'eval_runtime': '1.793', 'eval_samples_per_second': '784.3', 'eval_steps_per_second': '49.09', 'epoch': '1'}
{'loss': '0.8869', 'grad_norm': '32.48', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8354', 'eval_accuracy': '0.7297', 'eval_precision_macro': '0.7295', 'eval_recall_macro': '0.7706', 'eval_f1_macro': '0.7396', 'eval_runtime': '1.713', 'eval_samples_per_second': '820.7', 'eval_steps_per_second': '51.37', 'epoch': '2'}
{'loss': '0.7486', 'grad_norm': '17.56', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7337', 'eval_accuracy': '0.7646', 'eval_precision_macro': '0.7651', 'eval_recall_macro': '0.7845', 'eval_f1_macro': '0.7731', 'eval_runtime': '1.703', 'eval_samples_per_second': '825.5', 'eval_steps_per_second': '51.66', 'epoch': '

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.302', 'grad_norm': '28', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.069', 'eval_accuracy': '0.6522', 'eval_precision_macro': '0.6517', 'eval_recall_macro': '0.6836', 'eval_f1_macro': '0.6579', 'eval_runtime': '1.669', 'eval_samples_per_second': '842.3', 'eval_steps_per_second': '52.72', 'epoch': '1'}
{'loss': '0.9103', 'grad_norm': '23.53', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.9609', 'eval_accuracy': '0.6977', 'eval_precision_macro': '0.6962', 'eval_recall_macro': '0.7243', 'eval_f1_macro': '0.7045', 'eval_runtime': '1.669', 'eval_samples_per_second': '842.4', 'eval_steps_per_second': '52.72', 'epoch': '2'}
{'loss': '0.7709', 'grad_norm': '24.37', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.9127', 'eval_accuracy': '0.7077', 'eval_precision_macro': '0.7193', 'eval_recall_macro': '0.7196', 'eval_f1_macro': '0.7151', 'eval_runtime': '1.668', 'eval_samples_per_second': '842.8', 'eval_steps_per_second': '52.75', 'epoch': '3'}


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9774', 'grad_norm': '10.77', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.728', 'eval_accuracy': '0.7511', 'eval_precision_macro': '0.7526', 'eval_recall_macro': '0.779', 'eval_f1_macro': '0.7613', 'eval_runtime': '1.724', 'eval_samples_per_second': '815.8', 'eval_steps_per_second': '51.06', 'epoch': '1'}
{'loss': '0.6855', 'grad_norm': '7.855', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7471', 'eval_accuracy': '0.7475', 'eval_precision_macro': '0.7502', 'eval_recall_macro': '0.7826', 'eval_f1_macro': '0.7553', 'eval_runtime': '1.716', 'eval_samples_per_second': '819.4', 'eval_steps_per_second': '51.28', 'epoch': '2'}
{'loss': '0.5423', 'grad_norm': '40.43', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.714', 'eval_accuracy': '0.7738', 'eval_precision_macro': '0.777', 'eval_recall_macro': '0.7907', 'eval_f1_macro': '0.7808', 'eval_runtime': '1.728', 'eval_samples_per_second': '813.8', 'eval_steps_per_second': '50.93', 'epoch': '3'}

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.517', 'grad_norm': '56.68', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8406', 'eval_accuracy': '0.6991', 'eval_precision_macro': '0.7178', 'eval_recall_macro': '0.7011', 'eval_f1_macro': '0.7014', 'eval_runtime': '1.726', 'eval_samples_per_second': '814.8', 'eval_steps_per_second': '51', 'epoch': '1'}
{'loss': '0.933', 'grad_norm': '26.26', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.6953', 'eval_accuracy': '0.7568', 'eval_precision_macro': '0.7561', 'eval_recall_macro': '0.7786', 'eval_f1_macro': '0.7637', 'eval_runtime': '1.692', 'eval_samples_per_second': '831.1', 'eval_steps_per_second': '52.02', 'epoch': '2'}
{'loss': '0.7744', 'grad_norm': '54.31', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.6586', 'eval_accuracy': '0.7667', 'eval_precision_macro': '0.7709', 'eval_recall_macro': '0.7847', 'eval_f1_macro': '0.7755', 'eval_runtime': '1.741', 'eval_samples_per_second': '807.4', 'eval_steps_per_second': '50.53', 'epoch': '3'}


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.303', 'grad_norm': '20.47', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9334', 'eval_accuracy': '0.7027', 'eval_precision_macro': '0.7026', 'eval_recall_macro': '0.7309', 'eval_f1_macro': '0.7087', 'eval_runtime': '1.667', 'eval_samples_per_second': '843.7', 'eval_steps_per_second': '52.81', 'epoch': '1'}
{'loss': '0.91', 'grad_norm': '22.2', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8962', 'eval_accuracy': '0.7183', 'eval_precision_macro': '0.7167', 'eval_recall_macro': '0.7514', 'eval_f1_macro': '0.7244', 'eval_runtime': '1.651', 'eval_samples_per_second': '851.7', 'eval_steps_per_second': '53.31', 'epoch': '2'}
{'loss': '0.7821', 'grad_norm': '41.47', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8277', 'eval_accuracy': '0.7347', 'eval_precision_macro': '0.736', 'eval_recall_macro': '0.7525', 'eval_f1_macro': '0.7406', 'eval_runtime': '1.654', 'eval_samples_per_second': '850', 'eval_steps_per_second': '53.2', 'epoch': '3'}
{'l

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.004', 'grad_norm': '30.86', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.723', 'eval_accuracy': '0.7418', 'eval_precision_macro': '0.7363', 'eval_recall_macro': '0.7662', 'eval_f1_macro': '0.7468', 'eval_runtime': '1.735', 'eval_samples_per_second': '810.3', 'eval_steps_per_second': '50.71', 'epoch': '1'}
{'loss': '0.6636', 'grad_norm': '22.54', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.729', 'eval_accuracy': '0.7518', 'eval_precision_macro': '0.7504', 'eval_recall_macro': '0.7769', 'eval_f1_macro': '0.7583', 'eval_runtime': '1.74', 'eval_samples_per_second': '807.8', 'eval_steps_per_second': '50.56', 'epoch': '2'}
{'loss': '0.5456', 'grad_norm': '27.51', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8018', 'eval_accuracy': '0.739', 'eval_precision_macro': '0.7707', 'eval_recall_macro': '0.7352', 'eval_f1_macro': '0.7469', 'eval_runtime': '1.775', 'eval_samples_per_second': '792.3', 'eval_steps_per_second': '49.59', 'epoch': '3'}


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.685', 'grad_norm': '123.7', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.322', 'eval_accuracy': '0.4957', 'eval_precision_macro': '0.5417', 'eval_recall_macro': '0.4892', 'eval_f1_macro': '0.4608', 'eval_runtime': '1.731', 'eval_samples_per_second': '812.1', 'eval_steps_per_second': '50.83', 'epoch': '1'}
{'loss': '1.227', 'grad_norm': '30.72', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8219', 'eval_accuracy': '0.7141', 'eval_precision_macro': '0.7144', 'eval_recall_macro': '0.7414', 'eval_f1_macro': '0.7212', 'eval_runtime': '1.704', 'eval_samples_per_second': '825', 'eval_steps_per_second': '51.63', 'epoch': '2'}
{'loss': '0.8607', 'grad_norm': '41.84', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7202', 'eval_accuracy': '0.7504', 'eval_precision_macro': '0.7621', 'eval_recall_macro': '0.7544', 'eval_f1_macro': '0.7571', 'eval_runtime': '1.77', 'eval_samples_per_second': '794.6', 'eval_steps_per_second': '49.73', 'epoch': '3'}
{

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.281', 'grad_norm': '23.16', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9797', 'eval_accuracy': '0.6842', 'eval_precision_macro': '0.687', 'eval_recall_macro': '0.71', 'eval_f1_macro': '0.6913', 'eval_runtime': '1.651', 'eval_samples_per_second': '851.4', 'eval_steps_per_second': '53.29', 'epoch': '1'}
{'loss': '0.9154', 'grad_norm': '28.21', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8989', 'eval_accuracy': '0.6899', 'eval_precision_macro': '0.6936', 'eval_recall_macro': '0.7245', 'eval_f1_macro': '0.6988', 'eval_runtime': '1.673', 'eval_samples_per_second': '840.5', 'eval_steps_per_second': '52.6', 'epoch': '2'}
{'loss': '0.7674', 'grad_norm': '48.38', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.9149', 'eval_accuracy': '0.7013', 'eval_precision_macro': '0.7149', 'eval_recall_macro': '0.7039', 'eval_f1_macro': '0.7039', 'eval_runtime': '1.637', 'eval_samples_per_second': '858.8', 'eval_steps_per_second': '53.75', 'epoch': '3'}


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.004', 'grad_norm': '24.8', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.6847', 'eval_accuracy': '0.763', 'eval_precision_macro': '0.7635', 'eval_recall_macro': '0.7889', 'eval_f1_macro': '0.7713', 'eval_runtime': '1.726', 'eval_samples_per_second': '814.1', 'eval_steps_per_second': '50.99', 'epoch': '1'}
{'loss': '0.6806', 'grad_norm': '23.13', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7084', 'eval_accuracy': '0.763', 'eval_precision_macro': '0.7637', 'eval_recall_macro': '0.7891', 'eval_f1_macro': '0.7714', 'eval_runtime': '1.778', 'eval_samples_per_second': '790', 'eval_steps_per_second': '49.48', 'epoch': '2'}
{'loss': '0.5509', 'grad_norm': '29.49', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.717', 'eval_accuracy': '0.7594', 'eval_precision_macro': '0.7596', 'eval_recall_macro': '0.7882', 'eval_f1_macro': '0.7681', 'eval_runtime': '1.852', 'eval_samples_per_second': '758.5', 'eval_steps_per_second': '47.51', 'epoch': '3'}
{'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.537', 'grad_norm': '27.39', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8806', 'eval_accuracy': '0.7146', 'eval_precision_macro': '0.7192', 'eval_recall_macro': '0.7265', 'eval_f1_macro': '0.7178', 'eval_runtime': '1.8', 'eval_samples_per_second': '780.7', 'eval_steps_per_second': '48.9', 'epoch': '1'}
{'loss': '0.9445', 'grad_norm': '22.41', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.6875', 'eval_accuracy': '0.7665', 'eval_precision_macro': '0.7633', 'eval_recall_macro': '0.7986', 'eval_f1_macro': '0.7747', 'eval_runtime': '1.743', 'eval_samples_per_second': '806', 'eval_steps_per_second': '50.48', 'epoch': '2'}
{'loss': '0.7861', 'grad_norm': '54.25', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.6405', 'eval_accuracy': '0.7779', 'eval_precision_macro': '0.7776', 'eval_recall_macro': '0.8086', 'eval_f1_macro': '0.787', 'eval_runtime': '1.698', 'eval_samples_per_second': '827.5', 'eval_steps_per_second': '51.83', 'epoch': '3'}
{'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.331', 'grad_norm': '26.95', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8989', 'eval_accuracy': '0.6961', 'eval_precision_macro': '0.7048', 'eval_recall_macro': '0.7129', 'eval_f1_macro': '0.6994', 'eval_runtime': '1.667', 'eval_samples_per_second': '842.8', 'eval_steps_per_second': '52.79', 'epoch': '1'}
{'loss': '0.9241', 'grad_norm': '24.35', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8459', 'eval_accuracy': '0.721', 'eval_precision_macro': '0.7235', 'eval_recall_macro': '0.742', 'eval_f1_macro': '0.7291', 'eval_runtime': '1.663', 'eval_samples_per_second': '845.1', 'eval_steps_per_second': '52.93', 'epoch': '2'}
{'loss': '0.7983', 'grad_norm': '29.84', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8572', 'eval_accuracy': '0.7231', 'eval_precision_macro': '0.7227', 'eval_recall_macro': '0.7553', 'eval_f1_macro': '0.731', 'eval_runtime': '1.66', 'eval_samples_per_second': '846.2', 'eval_steps_per_second': '53', 'epoch': '3'}
{'l

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9905', 'grad_norm': '26.02', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8043', 'eval_accuracy': '0.7374', 'eval_precision_macro': '0.7395', 'eval_recall_macro': '0.7692', 'eval_f1_macro': '0.7486', 'eval_runtime': '1.786', 'eval_samples_per_second': '786.5', 'eval_steps_per_second': '49.26', 'epoch': '1'}
{'loss': '0.667', 'grad_norm': '29.73', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7958', 'eval_accuracy': '0.7473', 'eval_precision_macro': '0.7434', 'eval_recall_macro': '0.7662', 'eval_f1_macro': '0.7523', 'eval_runtime': '1.724', 'eval_samples_per_second': '814.9', 'eval_steps_per_second': '51.04', 'epoch': '2'}
{'loss': '0.5229', 'grad_norm': '19.67', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8367', 'eval_accuracy': '0.748', 'eval_precision_macro': '0.7474', 'eval_recall_macro': '0.7647', 'eval_f1_macro': '0.7526', 'eval_runtime': '1.849', 'eval_samples_per_second': '759.8', 'eval_steps_per_second': '47.59', 'epoch': '3

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.542', 'grad_norm': '178.5', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.061', 'eval_accuracy': '0.652', 'eval_precision_macro': '0.6586', 'eval_recall_macro': '0.6715', 'eval_f1_macro': '0.6569', 'eval_runtime': '1.809', 'eval_samples_per_second': '776.8', 'eval_steps_per_second': '48.65', 'epoch': '1'}
{'loss': '0.9091', 'grad_norm': '53.69', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8363', 'eval_accuracy': '0.7238', 'eval_precision_macro': '0.7256', 'eval_recall_macro': '0.7589', 'eval_f1_macro': '0.7341', 'eval_runtime': '1.745', 'eval_samples_per_second': '805.3', 'eval_steps_per_second': '50.44', 'epoch': '2'}
{'loss': '0.763', 'grad_norm': '39.88', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8075', 'eval_accuracy': '0.7295', 'eval_precision_macro': '0.7331', 'eval_recall_macro': '0.7634', 'eval_f1_macro': '0.7393', 'eval_runtime': '1.71', 'eval_samples_per_second': '821.4', 'eval_steps_per_second': '51.45', 'epoch': '3'}


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.326', 'grad_norm': '24.43', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.071', 'eval_accuracy': '0.6591', 'eval_precision_macro': '0.6616', 'eval_recall_macro': '0.6969', 'eval_f1_macro': '0.6686', 'eval_runtime': '1.818', 'eval_samples_per_second': '772.6', 'eval_steps_per_second': '48.39', 'epoch': '1'}
{'loss': '0.897', 'grad_norm': '22.46', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.9656', 'eval_accuracy': '0.6868', 'eval_precision_macro': '0.6866', 'eval_recall_macro': '0.7222', 'eval_f1_macro': '0.6957', 'eval_runtime': '1.642', 'eval_samples_per_second': '855.6', 'eval_steps_per_second': '53.59', 'epoch': '2'}
{'loss': '0.7812', 'grad_norm': '21.74', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.967', 'eval_accuracy': '0.7004', 'eval_precision_macro': '0.7021', 'eval_recall_macro': '0.7337', 'eval_f1_macro': '0.7066', 'eval_runtime': '1.658', 'eval_samples_per_second': '847.3', 'eval_steps_per_second': '53.07', 'epoch': '3'}

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

last_hidden_state: (1, 128, 768)

--- IndoBERTweet (indolem/indobertweet-base-uncased) ---
input_ids: (1, 128)
attention_mask: (1, 128)


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

last_hidden_state: (1, 128, 768)

--- IndoRoBERTa (cahya/roberta-base-indonesian-522M) ---
input_ids: (1, 128)
attention_mask: (1, 128)


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

last_hidden_state: (1, 128, 768)


===== TRAIN KFOLD k=6 fold=1 | IndoBERT =====


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9856', 'grad_norm': '16.79', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8732', 'eval_accuracy': '0.7193', 'eval_precision_macro': '0.7216', 'eval_recall_macro': '0.7687', 'eval_f1_macro': '0.7279', 'eval_runtime': '1.51', 'eval_samples_per_second': '776.1', 'eval_steps_per_second': '49', 'epoch': '1'}
{'loss': '0.6557', 'grad_norm': '18.89', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7723', 'eval_accuracy': '0.7526', 'eval_precision_macro': '0.7561', 'eval_recall_macro': '0.7698', 'eval_f1_macro': '0.7608', 'eval_runtime': '1.447', 'eval_samples_per_second': '809.8', 'eval_steps_per_second': '51.13', 'epoch': '2'}
{'loss': '0.5204', 'grad_norm': '24.33', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.831', 'eval_accuracy': '0.75', 'eval_precision_macro': '0.7511', 'eval_recall_macro': '0.7716', 'eval_f1_macro': '0.7586', 'eval_runtime': '1.491', 'eval_samples_per_second': '786', 'eval_steps_per_second': '49.63', 'epoch': '3'}
{'lo

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.494', 'grad_norm': '37.43', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8313', 'eval_accuracy': '0.7082', 'eval_precision_macro': '0.7111', 'eval_recall_macro': '0.7278', 'eval_f1_macro': '0.7157', 'eval_runtime': '1.49', 'eval_samples_per_second': '786.4', 'eval_steps_per_second': '49.65', 'epoch': '1'}
{'loss': '0.8787', 'grad_norm': '25.61', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7256', 'eval_accuracy': '0.75', 'eval_precision_macro': '0.7573', 'eval_recall_macro': '0.7682', 'eval_f1_macro': '0.7598', 'eval_runtime': '1.531', 'eval_samples_per_second': '765.4', 'eval_steps_per_second': '48.33', 'epoch': '2'}
{'loss': '0.762', 'grad_norm': '24.32', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7444', 'eval_accuracy': '0.7577', 'eval_precision_macro': '0.7578', 'eval_recall_macro': '0.7872', 'eval_f1_macro': '0.7669', 'eval_runtime': '1.529', 'eval_samples_per_second': '766.4', 'eval_steps_per_second': '48.39', 'epoch': '3'}


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.287', 'grad_norm': '18.39', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.134', 'eval_accuracy': '0.6502', 'eval_precision_macro': '0.658', 'eval_recall_macro': '0.7029', 'eval_f1_macro': '0.6569', 'eval_runtime': '1.498', 'eval_samples_per_second': '782.5', 'eval_steps_per_second': '49.4', 'epoch': '1'}
{'loss': '0.903', 'grad_norm': '15.11', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8946', 'eval_accuracy': '0.7099', 'eval_precision_macro': '0.714', 'eval_recall_macro': '0.7367', 'eval_f1_macro': '0.7188', 'eval_runtime': '1.424', 'eval_samples_per_second': '823.1', 'eval_steps_per_second': '51.97', 'epoch': '2'}
{'loss': '0.7683', 'grad_norm': '19.76', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8896', 'eval_accuracy': '0.7108', 'eval_precision_macro': '0.7087', 'eval_recall_macro': '0.7381', 'eval_f1_macro': '0.7193', 'eval_runtime': '1.406', 'eval_samples_per_second': '833.7', 'eval_steps_per_second': '52.64', 'epoch': '3'}
{

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.009', 'grad_norm': '22.16', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.6945', 'eval_accuracy': '0.756', 'eval_precision_macro': '0.756', 'eval_recall_macro': '0.7871', 'eval_f1_macro': '0.7631', 'eval_runtime': '1.447', 'eval_samples_per_second': '809.8', 'eval_steps_per_second': '51.13', 'epoch': '1'}
{'loss': '0.6616', 'grad_norm': '18.97', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.6571', 'eval_accuracy': '0.7747', 'eval_precision_macro': '0.7833', 'eval_recall_macro': '0.7906', 'eval_f1_macro': '0.7823', 'eval_runtime': '1.451', 'eval_samples_per_second': '807.6', 'eval_steps_per_second': '50.99', 'epoch': '2'}
{'loss': '0.5437', 'grad_norm': '15.25', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7035', 'eval_accuracy': '0.7696', 'eval_precision_macro': '0.7743', 'eval_recall_macro': '0.7822', 'eval_f1_macro': '0.7773', 'eval_runtime': '1.452', 'eval_samples_per_second': '807.3', 'eval_steps_per_second': '50.97', 'epoch': '3'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.558', 'grad_norm': '34.84', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8473', 'eval_accuracy': '0.6937', 'eval_precision_macro': '0.7219', 'eval_recall_macro': '0.6892', 'eval_f1_macro': '0.692', 'eval_runtime': '1.471', 'eval_samples_per_second': '796.6', 'eval_steps_per_second': '50.3', 'epoch': '1'}
{'loss': '0.9316', 'grad_norm': '19.97', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.6488', 'eval_accuracy': '0.7773', 'eval_precision_macro': '0.7811', 'eval_recall_macro': '0.7915', 'eval_f1_macro': '0.7848', 'eval_runtime': '1.506', 'eval_samples_per_second': '777.9', 'eval_steps_per_second': '49.12', 'epoch': '2'}
{'loss': '0.7973', 'grad_norm': '26.35', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.6197', 'eval_accuracy': '0.7773', 'eval_precision_macro': '0.78', 'eval_recall_macro': '0.798', 'eval_f1_macro': '0.7872', 'eval_runtime': '1.437', 'eval_samples_per_second': '815.4', 'eval_steps_per_second': '51.48', 'epoch': '3'}
{

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.381', 'grad_norm': '14.73', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.016', 'eval_accuracy': '0.6706', 'eval_precision_macro': '0.6786', 'eval_recall_macro': '0.7192', 'eval_f1_macro': '0.6774', 'eval_runtime': '1.436', 'eval_samples_per_second': '816.2', 'eval_steps_per_second': '51.53', 'epoch': '1'}
{'loss': '0.9256', 'grad_norm': '18.59', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8553', 'eval_accuracy': '0.7031', 'eval_precision_macro': '0.7204', 'eval_recall_macro': '0.7319', 'eval_f1_macro': '0.7145', 'eval_runtime': '1.405', 'eval_samples_per_second': '834', 'eval_steps_per_second': '52.66', 'epoch': '2'}
{'loss': '0.7787', 'grad_norm': '18.04', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8075', 'eval_accuracy': '0.7338', 'eval_precision_macro': '0.7382', 'eval_recall_macro': '0.7536', 'eval_f1_macro': '0.7441', 'eval_runtime': '1.481', 'eval_samples_per_second': '791.2', 'eval_steps_per_second': '49.95', 'epoch': '3'}

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9683', 'grad_norm': '10.86', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.7607', 'eval_accuracy': '0.7293', 'eval_precision_macro': '0.7346', 'eval_recall_macro': '0.7475', 'eval_f1_macro': '0.7349', 'eval_runtime': '1.489', 'eval_samples_per_second': '786.6', 'eval_steps_per_second': '49.71', 'epoch': '1'}
{'loss': '0.6669', 'grad_norm': '6.978', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7442', 'eval_accuracy': '0.7498', 'eval_precision_macro': '0.7467', 'eval_recall_macro': '0.7712', 'eval_f1_macro': '0.7545', 'eval_runtime': '1.476', 'eval_samples_per_second': '793.6', 'eval_steps_per_second': '50.15', 'epoch': '2'}
{'loss': '0.5336', 'grad_norm': '37.03', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7955', 'eval_accuracy': '0.7498', 'eval_precision_macro': '0.7442', 'eval_recall_macro': '0.771', 'eval_f1_macro': '0.7546', 'eval_runtime': '1.488', 'eval_samples_per_second': '787.2', 'eval_steps_per_second': '49.75', 'epoch': '

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.535', 'grad_norm': '29.28', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.911', 'eval_accuracy': '0.6968', 'eval_precision_macro': '0.6999', 'eval_recall_macro': '0.708', 'eval_f1_macro': '0.7012', 'eval_runtime': '1.428', 'eval_samples_per_second': '820.2', 'eval_steps_per_second': '51.83', 'epoch': '1'}
{'loss': '0.9286', 'grad_norm': '35.54', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7882', 'eval_accuracy': '0.7412', 'eval_precision_macro': '0.7415', 'eval_recall_macro': '0.7624', 'eval_f1_macro': '0.7479', 'eval_runtime': '1.438', 'eval_samples_per_second': '814.2', 'eval_steps_per_second': '51.45', 'epoch': '2'}
{'loss': '0.7506', 'grad_norm': '17.88', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7508', 'eval_accuracy': '0.7506', 'eval_precision_macro': '0.7508', 'eval_recall_macro': '0.7677', 'eval_f1_macro': '0.7572', 'eval_runtime': '1.493', 'eval_samples_per_second': '784.2', 'eval_steps_per_second': '49.56', 'epoch': '3'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.304', 'grad_norm': '17.67', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.016', 'eval_accuracy': '0.6635', 'eval_precision_macro': '0.6644', 'eval_recall_macro': '0.6945', 'eval_f1_macro': '0.6713', 'eval_runtime': '1.436', 'eval_samples_per_second': '815.5', 'eval_steps_per_second': '51.53', 'epoch': '1'}
{'loss': '0.9009', 'grad_norm': '11.34', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.9663', 'eval_accuracy': '0.6635', 'eval_precision_macro': '0.6643', 'eval_recall_macro': '0.6932', 'eval_f1_macro': '0.6705', 'eval_runtime': '1.391', 'eval_samples_per_second': '841.6', 'eval_steps_per_second': '53.19', 'epoch': '2'}
{'loss': '0.7732', 'grad_norm': '4.386', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.9737', 'eval_accuracy': '0.6951', 'eval_precision_macro': '0.6918', 'eval_recall_macro': '0.7232', 'eval_f1_macro': '0.7019', 'eval_runtime': '1.386', 'eval_samples_per_second': '845.1', 'eval_steps_per_second': '53.4', 'epoch': '3'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9798', 'grad_norm': '12.45', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.6665', 'eval_accuracy': '0.7686', 'eval_precision_macro': '0.767', 'eval_recall_macro': '0.8007', 'eval_f1_macro': '0.7789', 'eval_runtime': '1.47', 'eval_samples_per_second': '796.9', 'eval_steps_per_second': '50.36', 'epoch': '1'}
{'loss': '0.6948', 'grad_norm': '7.98', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.6163', 'eval_accuracy': '0.7771', 'eval_precision_macro': '0.7784', 'eval_recall_macro': '0.7995', 'eval_f1_macro': '0.7847', 'eval_runtime': '1.443', 'eval_samples_per_second': '811.6', 'eval_steps_per_second': '51.29', 'epoch': '2'}
{'loss': '0.5492', 'grad_norm': 'inf', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.6669', 'eval_accuracy': '0.7737', 'eval_precision_macro': '0.7814', 'eval_recall_macro': '0.7816', 'eval_f1_macro': '0.78', 'eval_runtime': '1.444', 'eval_samples_per_second': '811.1', 'eval_steps_per_second': '51.26', 'epoch': '3'}
{'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.505', 'grad_norm': '58.15', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8334', 'eval_accuracy': '0.7173', 'eval_precision_macro': '0.7204', 'eval_recall_macro': '0.7418', 'eval_f1_macro': '0.7253', 'eval_runtime': '1.704', 'eval_samples_per_second': '687.2', 'eval_steps_per_second': '43.43', 'epoch': '1'}
{'loss': '0.9007', 'grad_norm': '5.276', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.6723', 'eval_accuracy': '0.7566', 'eval_precision_macro': '0.7555', 'eval_recall_macro': '0.7851', 'eval_f1_macro': '0.7658', 'eval_runtime': '1.447', 'eval_samples_per_second': '809.4', 'eval_steps_per_second': '51.15', 'epoch': '2'}
{'loss': '0.7594', 'grad_norm': '52.24', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.6538', 'eval_accuracy': '0.7703', 'eval_precision_macro': '0.7729', 'eval_recall_macro': '0.7944', 'eval_f1_macro': '0.7804', 'eval_runtime': '1.452', 'eval_samples_per_second': '806.4', 'eval_steps_per_second': '50.96', 'epoch': '

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.287', 'grad_norm': '29.99', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8615', 'eval_accuracy': '0.7096', 'eval_precision_macro': '0.7064', 'eval_recall_macro': '0.7327', 'eval_f1_macro': '0.7145', 'eval_runtime': '1.409', 'eval_samples_per_second': '831', 'eval_steps_per_second': '52.52', 'epoch': '1'}
{'loss': '0.9037', 'grad_norm': '6.685', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8284', 'eval_accuracy': '0.7011', 'eval_precision_macro': '0.7029', 'eval_recall_macro': '0.7441', 'eval_f1_macro': '0.7096', 'eval_runtime': '1.402', 'eval_samples_per_second': '835.2', 'eval_steps_per_second': '52.78', 'epoch': '2'}
{'loss': '0.7757', 'grad_norm': '26.32', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7663', 'eval_accuracy': '0.7421', 'eval_precision_macro': '0.7428', 'eval_recall_macro': '0.7612', 'eval_f1_macro': '0.7495', 'eval_runtime': '1.414', 'eval_samples_per_second': '828.2', 'eval_steps_per_second': '52.34', 'epoch': '3'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9737', 'grad_norm': '42.88', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.7261', 'eval_accuracy': '0.7515', 'eval_precision_macro': '0.7577', 'eval_recall_macro': '0.7782', 'eval_f1_macro': '0.7628', 'eval_runtime': '1.553', 'eval_samples_per_second': '754', 'eval_steps_per_second': '47.65', 'epoch': '1'}
{'loss': '0.6668', 'grad_norm': '7.623', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7345', 'eval_accuracy': '0.7523', 'eval_precision_macro': '0.7572', 'eval_recall_macro': '0.7755', 'eval_f1_macro': '0.7611', 'eval_runtime': '1.442', 'eval_samples_per_second': '812.1', 'eval_steps_per_second': '51.32', 'epoch': '2'}
{'loss': '0.5426', 'grad_norm': '104.6', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7965', 'eval_accuracy': '0.7481', 'eval_precision_macro': '0.751', 'eval_recall_macro': '0.7719', 'eval_f1_macro': '0.7565', 'eval_runtime': '1.446', 'eval_samples_per_second': '809.6', 'eval_steps_per_second': '51.16', 'epoch': '3'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.516', 'grad_norm': '120.5', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8868', 'eval_accuracy': '0.696', 'eval_precision_macro': '0.6999', 'eval_recall_macro': '0.7128', 'eval_f1_macro': '0.6983', 'eval_runtime': '1.513', 'eval_samples_per_second': '774.1', 'eval_steps_per_second': '48.92', 'epoch': '1'}
{'loss': '0.9174', 'grad_norm': '146.6', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7094', 'eval_accuracy': '0.7532', 'eval_precision_macro': '0.7533', 'eval_recall_macro': '0.7792', 'eval_f1_macro': '0.7633', 'eval_runtime': '1.433', 'eval_samples_per_second': '817.3', 'eval_steps_per_second': '51.65', 'epoch': '2'}
{'loss': '0.7748', 'grad_norm': '170.3', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7146', 'eval_accuracy': '0.7694', 'eval_precision_macro': '0.7718', 'eval_recall_macro': '0.7954', 'eval_f1_macro': '0.7795', 'eval_runtime': '1.436', 'eval_samples_per_second': '815.7', 'eval_steps_per_second': '51.55', 'epoch': '3

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.328', 'grad_norm': '25.13', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9383', 'eval_accuracy': '0.6951', 'eval_precision_macro': '0.6917', 'eval_recall_macro': '0.7256', 'eval_f1_macro': '0.7003', 'eval_runtime': '1.429', 'eval_samples_per_second': '819.4', 'eval_steps_per_second': '51.78', 'epoch': '1'}
{'loss': '0.9066', 'grad_norm': '14.65', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.904', 'eval_accuracy': '0.6934', 'eval_precision_macro': '0.7077', 'eval_recall_macro': '0.7094', 'eval_f1_macro': '0.7009', 'eval_runtime': '1.498', 'eval_samples_per_second': '781.8', 'eval_steps_per_second': '49.41', 'epoch': '2'}
{'loss': '0.787', 'grad_norm': '23.38', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.9295', 'eval_accuracy': '0.7028', 'eval_precision_macro': '0.7049', 'eval_recall_macro': '0.731', 'eval_f1_macro': '0.7124', 'eval_runtime': '1.387', 'eval_samples_per_second': '844', 'eval_steps_per_second': '53.33', 'epoch': '3'}
{

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9772', 'grad_norm': 'inf', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.7969', 'eval_accuracy': '0.7447', 'eval_precision_macro': '0.7437', 'eval_recall_macro': '0.7703', 'eval_f1_macro': '0.7524', 'eval_runtime': '1.513', 'eval_samples_per_second': '774.1', 'eval_steps_per_second': '48.92', 'epoch': '1'}
{'loss': '0.6589', 'grad_norm': '10.61', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8247', 'eval_accuracy': '0.7361', 'eval_precision_macro': '0.7393', 'eval_recall_macro': '0.7575', 'eval_f1_macro': '0.7412', 'eval_runtime': '1.446', 'eval_samples_per_second': '810', 'eval_steps_per_second': '51.19', 'epoch': '2'}
{'loss': '0.5473', 'grad_norm': '93.25', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8137', 'eval_accuracy': '0.7506', 'eval_precision_macro': '0.7538', 'eval_recall_macro': '0.7718', 'eval_f1_macro': '0.7604', 'eval_runtime': '1.445', 'eval_samples_per_second': '810.2', 'eval_steps_per_second': '51.2', 'epoch': '3'}


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.523', 'grad_norm': '143.6', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9112', 'eval_accuracy': '0.6815', 'eval_precision_macro': '0.6824', 'eval_recall_macro': '0.7062', 'eval_f1_macro': '0.6886', 'eval_runtime': '1.544', 'eval_samples_per_second': '758.3', 'eval_steps_per_second': '47.92', 'epoch': '1'}
{'loss': '0.8917', 'grad_norm': '294.3', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7962', 'eval_accuracy': '0.731', 'eval_precision_macro': '0.7316', 'eval_recall_macro': '0.7583', 'eval_f1_macro': '0.7404', 'eval_runtime': '1.433', 'eval_samples_per_second': '817.4', 'eval_steps_per_second': '51.66', 'epoch': '2'}
{'loss': '0.7571', 'grad_norm': '127.3', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7865', 'eval_accuracy': '0.743', 'eval_precision_macro': '0.743', 'eval_recall_macro': '0.7715', 'eval_f1_macro': '0.7522', 'eval_runtime': '1.508', 'eval_samples_per_second': '776.5', 'eval_steps_per_second': '49.07', 'epoch': '3'}

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.313', 'grad_norm': 'inf', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.027', 'eval_accuracy': '0.6781', 'eval_precision_macro': '0.6776', 'eval_recall_macro': '0.6997', 'eval_f1_macro': '0.682', 'eval_runtime': '1.371', 'eval_samples_per_second': '854.4', 'eval_steps_per_second': '53.99', 'epoch': '1'}
{'loss': '0.8971', 'grad_norm': '23.45', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.998', 'eval_accuracy': '0.6763', 'eval_precision_macro': '0.6801', 'eval_recall_macro': '0.7097', 'eval_f1_macro': '0.6833', 'eval_runtime': '1.398', 'eval_samples_per_second': '837.7', 'eval_steps_per_second': '52.94', 'epoch': '2'}
{'loss': '0.7624', 'grad_norm': '77.9', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.9432', 'eval_accuracy': '0.6892', 'eval_precision_macro': '0.6915', 'eval_recall_macro': '0.7078', 'eval_f1_macro': '0.6965', 'eval_runtime': '1.458', 'eval_samples_per_second': '803.1', 'eval_steps_per_second': '50.75', 'epoch': '3'}
{'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

last_hidden_state: (1, 128, 768)

--- IndoBERTweet (indolem/indobertweet-base-uncased) ---
input_ids: (1, 128)
attention_mask: (1, 128)


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

last_hidden_state: (1, 128, 768)

--- IndoRoBERTa (cahya/roberta-base-indonesian-522M) ---
input_ids: (1, 128)
attention_mask: (1, 128)


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

last_hidden_state: (1, 128, 768)


===== TRAIN KFOLD k=7 fold=1 | IndoBERT =====


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9744', 'grad_norm': '19.32', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.7795', 'eval_accuracy': '0.738', 'eval_precision_macro': '0.7491', 'eval_recall_macro': '0.7494', 'eval_f1_macro': '0.7452', 'eval_runtime': '1.248', 'eval_samples_per_second': '804.7', 'eval_steps_per_second': '50.5', 'epoch': '1'}
{'loss': '0.6563', 'grad_norm': '17.72', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8362', 'eval_accuracy': '0.738', 'eval_precision_macro': '0.7395', 'eval_recall_macro': '0.7695', 'eval_f1_macro': '0.7481', 'eval_runtime': '1.423', 'eval_samples_per_second': '705.6', 'eval_steps_per_second': '44.27', 'epoch': '2'}
{'loss': '0.5322', 'grad_norm': '27.84', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8087', 'eval_accuracy': '0.745', 'eval_precision_macro': '0.7488', 'eval_recall_macro': '0.7545', 'eval_f1_macro': '0.7506', 'eval_runtime': '1.256', 'eval_samples_per_second': '799.4', 'eval_steps_per_second': '50.16', 'epoch': '3'}

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.52', 'grad_norm': '30.37', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8856', 'eval_accuracy': '0.6982', 'eval_precision_macro': '0.7042', 'eval_recall_macro': '0.7049', 'eval_f1_macro': '0.7037', 'eval_runtime': '1.456', 'eval_samples_per_second': '689.7', 'eval_steps_per_second': '43.28', 'epoch': '1'}
{'loss': '0.925', 'grad_norm': '41.22', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7968', 'eval_accuracy': '0.743', 'eval_precision_macro': '0.7402', 'eval_recall_macro': '0.7686', 'eval_f1_macro': '0.7505', 'eval_runtime': '1.225', 'eval_samples_per_second': '819.9', 'eval_steps_per_second': '51.45', 'epoch': '2'}
{'loss': '0.7711', 'grad_norm': '73.78', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7565', 'eval_accuracy': '0.75', 'eval_precision_macro': '0.7466', 'eval_recall_macro': '0.7798', 'eval_f1_macro': '0.7588', 'eval_runtime': '1.243', 'eval_samples_per_second': '807.8', 'eval_steps_per_second': '50.69', 'epoch': '3'}
{

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.301', 'grad_norm': '17.62', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.056', 'eval_accuracy': '0.6295', 'eval_precision_macro': '0.6384', 'eval_recall_macro': '0.6591', 'eval_f1_macro': '0.6398', 'eval_runtime': '1.206', 'eval_samples_per_second': '832.8', 'eval_steps_per_second': '52.26', 'epoch': '1'}
{'loss': '0.8959', 'grad_norm': '17.42', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.9479', 'eval_accuracy': '0.6952', 'eval_precision_macro': '0.6949', 'eval_recall_macro': '0.7314', 'eval_f1_macro': '0.7042', 'eval_runtime': '1.186', 'eval_samples_per_second': '846.4', 'eval_steps_per_second': '53.11', 'epoch': '2'}
{'loss': '0.7763', 'grad_norm': '23.62', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8815', 'eval_accuracy': '0.7241', 'eval_precision_macro': '0.7248', 'eval_recall_macro': '0.7485', 'eval_f1_macro': '0.7343', 'eval_runtime': '1.181', 'eval_samples_per_second': '850.1', 'eval_steps_per_second': '53.34', 'epoch': '3

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9957', 'grad_norm': '17.61', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.6371', 'eval_accuracy': '0.7789', 'eval_precision_macro': '0.7822', 'eval_recall_macro': '0.7871', 'eval_f1_macro': '0.7831', 'eval_runtime': '1.41', 'eval_samples_per_second': '712.2', 'eval_steps_per_second': '44.69', 'epoch': '1'}
{'loss': '0.6803', 'grad_norm': '16.84', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.672', 'eval_accuracy': '0.7769', 'eval_precision_macro': '0.7717', 'eval_recall_macro': '0.8087', 'eval_f1_macro': '0.7831', 'eval_runtime': '1.252', 'eval_samples_per_second': '801.7', 'eval_steps_per_second': '50.3', 'epoch': '2'}
{'loss': '0.5447', 'grad_norm': '22.28', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.6513', 'eval_accuracy': '0.7849', 'eval_precision_macro': '0.7901', 'eval_recall_macro': '0.7971', 'eval_f1_macro': '0.7919', 'eval_runtime': '1.227', 'eval_samples_per_second': '818.5', 'eval_steps_per_second': '51.36', 'epoch': '3'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.545', 'grad_norm': '47.21', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8615', 'eval_accuracy': '0.7042', 'eval_precision_macro': '0.709', 'eval_recall_macro': '0.7148', 'eval_f1_macro': '0.7087', 'eval_runtime': '1.234', 'eval_samples_per_second': '813.8', 'eval_steps_per_second': '51.07', 'epoch': '1'}
{'loss': '0.9339', 'grad_norm': '71.02', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.6812', 'eval_accuracy': '0.7639', 'eval_precision_macro': '0.7634', 'eval_recall_macro': '0.7867', 'eval_f1_macro': '0.7697', 'eval_runtime': '1.223', 'eval_samples_per_second': '820.7', 'eval_steps_per_second': '51.49', 'epoch': '2'}
{'loss': '0.7742', 'grad_norm': '29.3', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.6608', 'eval_accuracy': '0.7779', 'eval_precision_macro': '0.7792', 'eval_recall_macro': '0.8032', 'eval_f1_macro': '0.7859', 'eval_runtime': '1.232', 'eval_samples_per_second': '815.2', 'eval_steps_per_second': '51.15', 'epoch': '3'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.31', 'grad_norm': '17.63', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9001', 'eval_accuracy': '0.7002', 'eval_precision_macro': '0.7084', 'eval_recall_macro': '0.7234', 'eval_f1_macro': '0.7066', 'eval_runtime': '1.198', 'eval_samples_per_second': '838.3', 'eval_steps_per_second': '52.6', 'epoch': '1'}
{'loss': '0.9151', 'grad_norm': '21.09', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8635', 'eval_accuracy': '0.7161', 'eval_precision_macro': '0.7172', 'eval_recall_macro': '0.7521', 'eval_f1_macro': '0.7219', 'eval_runtime': '1.253', 'eval_samples_per_second': '801.2', 'eval_steps_per_second': '50.28', 'epoch': '2'}
{'loss': '0.7665', 'grad_norm': '30.11', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8105', 'eval_accuracy': '0.7371', 'eval_precision_macro': '0.7409', 'eval_recall_macro': '0.7549', 'eval_f1_macro': '0.7433', 'eval_runtime': '1.23', 'eval_samples_per_second': '816.2', 'eval_steps_per_second': '51.22', 'epoch': '3'}

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.012', 'grad_norm': '27.72', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.6638', 'eval_accuracy': '0.76', 'eval_precision_macro': '0.7777', 'eval_recall_macro': '0.7578', 'eval_f1_macro': '0.7659', 'eval_runtime': '1.334', 'eval_samples_per_second': '752.6', 'eval_steps_per_second': '47.23', 'epoch': '1'}
{'loss': '0.6674', 'grad_norm': '9.205', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8094', 'eval_accuracy': '0.754', 'eval_precision_macro': '0.7509', 'eval_recall_macro': '0.7844', 'eval_f1_macro': '0.7579', 'eval_runtime': '1.308', 'eval_samples_per_second': '767.4', 'eval_steps_per_second': '48.15', 'epoch': '2'}
{'loss': '0.5292', 'grad_norm': '5.698', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7387', 'eval_accuracy': '0.7749', 'eval_precision_macro': '0.7759', 'eval_recall_macro': '0.7869', 'eval_f1_macro': '0.7791', 'eval_runtime': '1.243', 'eval_samples_per_second': '807.8', 'eval_steps_per_second': '50.69', 'epoch': '3'}

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.55', 'grad_norm': '48.69', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9595', 'eval_accuracy': '0.6743', 'eval_precision_macro': '0.673', 'eval_recall_macro': '0.685', 'eval_f1_macro': '0.6725', 'eval_runtime': '1.249', 'eval_samples_per_second': '804', 'eval_steps_per_second': '50.45', 'epoch': '1'}
{'loss': '0.9329', 'grad_norm': '23.26', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7712', 'eval_accuracy': '0.741', 'eval_precision_macro': '0.7368', 'eval_recall_macro': '0.771', 'eval_f1_macro': '0.7475', 'eval_runtime': '1.294', 'eval_samples_per_second': '775.6', 'eval_steps_per_second': '48.67', 'epoch': '2'}
{'loss': '0.7672', 'grad_norm': '32.63', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7566', 'eval_accuracy': '0.741', 'eval_precision_macro': '0.7361', 'eval_recall_macro': '0.7743', 'eval_f1_macro': '0.7466', 'eval_runtime': '1.218', 'eval_samples_per_second': '824.3', 'eval_steps_per_second': '51.73', 'epoch': '3'}
{'lo

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.291', 'grad_norm': '23.4', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9427', 'eval_accuracy': '0.6912', 'eval_precision_macro': '0.6996', 'eval_recall_macro': '0.6996', 'eval_f1_macro': '0.6971', 'eval_runtime': '1.229', 'eval_samples_per_second': '816.9', 'eval_steps_per_second': '51.26', 'epoch': '1'}
{'loss': '0.9046', 'grad_norm': '11.07', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.9171', 'eval_accuracy': '0.6863', 'eval_precision_macro': '0.6867', 'eval_recall_macro': '0.724', 'eval_f1_macro': '0.6942', 'eval_runtime': '1.265', 'eval_samples_per_second': '793.6', 'eval_steps_per_second': '49.8', 'epoch': '2'}
{'loss': '0.7766', 'grad_norm': '16.09', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8712', 'eval_accuracy': '0.7141', 'eval_precision_macro': '0.7198', 'eval_recall_macro': '0.7401', 'eval_f1_macro': '0.7212', 'eval_runtime': '1.193', 'eval_samples_per_second': '841.3', 'eval_steps_per_second': '52.79', 'epoch': '3'}

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9876', 'grad_norm': '31.08', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.7579', 'eval_accuracy': '0.7191', 'eval_precision_macro': '0.7287', 'eval_recall_macro': '0.7271', 'eval_f1_macro': '0.7201', 'eval_runtime': '1.337', 'eval_samples_per_second': '750.9', 'eval_steps_per_second': '47.12', 'epoch': '1'}
{'loss': '0.6727', 'grad_norm': '16.06', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7128', 'eval_accuracy': '0.75', 'eval_precision_macro': '0.746', 'eval_recall_macro': '0.7742', 'eval_f1_macro': '0.7558', 'eval_runtime': '1.251', 'eval_samples_per_second': '802.3', 'eval_steps_per_second': '50.35', 'epoch': '2'}
{'loss': '0.538', 'grad_norm': '9.448', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8111', 'eval_accuracy': '0.742', 'eval_precision_macro': '0.7483', 'eval_recall_macro': '0.7515', 'eval_f1_macro': '0.7467', 'eval_runtime': '1.254', 'eval_samples_per_second': '800.9', 'eval_steps_per_second': '50.25', 'epoch': '3'}


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.503', 'grad_norm': '33.99', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8681', 'eval_accuracy': '0.6972', 'eval_precision_macro': '0.7017', 'eval_recall_macro': '0.7084', 'eval_f1_macro': '0.6998', 'eval_runtime': '1.423', 'eval_samples_per_second': '705.3', 'eval_steps_per_second': '44.26', 'epoch': '1'}
{'loss': '0.8749', 'grad_norm': '506.9', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7476', 'eval_accuracy': '0.744', 'eval_precision_macro': '0.7408', 'eval_recall_macro': '0.7686', 'eval_f1_macro': '0.7505', 'eval_runtime': '1.3', 'eval_samples_per_second': '772.3', 'eval_steps_per_second': '48.46', 'epoch': '2'}
{'loss': '0.7479', 'grad_norm': '14.3', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7157', 'eval_accuracy': '0.752', 'eval_precision_macro': '0.748', 'eval_recall_macro': '0.7782', 'eval_f1_macro': '0.7581', 'eval_runtime': '1.23', 'eval_samples_per_second': '816.1', 'eval_steps_per_second': '51.21', 'epoch': '3'}
{'l

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.285', 'grad_norm': '27.18', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9936', 'eval_accuracy': '0.6653', 'eval_precision_macro': '0.6899', 'eval_recall_macro': '0.6632', 'eval_f1_macro': '0.6694', 'eval_runtime': '1.244', 'eval_samples_per_second': '807.2', 'eval_steps_per_second': '50.65', 'epoch': '1'}
{'loss': '0.9041', 'grad_norm': '16.44', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '1.009', 'eval_accuracy': '0.6793', 'eval_precision_macro': '0.6828', 'eval_recall_macro': '0.7241', 'eval_f1_macro': '0.686', 'eval_runtime': '1.202', 'eval_samples_per_second': '835.2', 'eval_steps_per_second': '52.41', 'epoch': '2'}
{'loss': '0.7691', 'grad_norm': '28.03', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.9122', 'eval_accuracy': '0.6853', 'eval_precision_macro': '0.6889', 'eval_recall_macro': '0.7013', 'eval_f1_macro': '0.6934', 'eval_runtime': '1.189', 'eval_samples_per_second': '844.4', 'eval_steps_per_second': '52.98', 'epoch': '3'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.026', 'grad_norm': '28.67', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.6109', 'eval_accuracy': '0.7978', 'eval_precision_macro': '0.802', 'eval_recall_macro': '0.8127', 'eval_f1_macro': '0.8054', 'eval_runtime': '1.239', 'eval_samples_per_second': '810.5', 'eval_steps_per_second': '50.86', 'epoch': '1'}
{'loss': '0.6876', 'grad_norm': '24.1', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.6885', 'eval_accuracy': '0.7779', 'eval_precision_macro': '0.7786', 'eval_recall_macro': '0.8041', 'eval_f1_macro': '0.7856', 'eval_runtime': '1.238', 'eval_samples_per_second': '811', 'eval_steps_per_second': '50.89', 'epoch': '2'}
{'loss': '0.5546', 'grad_norm': '17.02', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.6905', 'eval_accuracy': '0.7878', 'eval_precision_macro': '0.7899', 'eval_recall_macro': '0.8048', 'eval_f1_macro': '0.7947', 'eval_runtime': '1.246', 'eval_samples_per_second': '806.1', 'eval_steps_per_second': '50.58', 'epoch': '3'}


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.539', 'grad_norm': '68.54', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8095', 'eval_accuracy': '0.7092', 'eval_precision_macro': '0.7152', 'eval_recall_macro': '0.7288', 'eval_f1_macro': '0.7154', 'eval_runtime': '1.285', 'eval_samples_per_second': '781.6', 'eval_steps_per_second': '49.04', 'epoch': '1'}
{'loss': '0.9098', 'grad_norm': '28.69', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.6436', 'eval_accuracy': '0.7729', 'eval_precision_macro': '0.769', 'eval_recall_macro': '0.8085', 'eval_f1_macro': '0.7822', 'eval_runtime': '1.217', 'eval_samples_per_second': '825.2', 'eval_steps_per_second': '51.78', 'epoch': '2'}
{'loss': '0.7836', 'grad_norm': '21.58', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.6192', 'eval_accuracy': '0.7898', 'eval_precision_macro': '0.7888', 'eval_recall_macro': '0.8264', 'eval_f1_macro': '0.8012', 'eval_runtime': '1.334', 'eval_samples_per_second': '752.4', 'eval_steps_per_second': '47.21', 'epoch': '3

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.31', 'grad_norm': '25.37', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8561', 'eval_accuracy': '0.7201', 'eval_precision_macro': '0.7412', 'eval_recall_macro': '0.7189', 'eval_f1_macro': '0.7246', 'eval_runtime': '1.234', 'eval_samples_per_second': '813.9', 'eval_steps_per_second': '51.07', 'epoch': '1'}
{'loss': '0.9122', 'grad_norm': '21.71', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8161', 'eval_accuracy': '0.74', 'eval_precision_macro': '0.736', 'eval_recall_macro': '0.7621', 'eval_f1_macro': '0.7439', 'eval_runtime': '1.185', 'eval_samples_per_second': '846.9', 'eval_steps_per_second': '53.14', 'epoch': '2'}
{'loss': '0.7964', 'grad_norm': '20.41', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7652', 'eval_accuracy': '0.746', 'eval_precision_macro': '0.7615', 'eval_recall_macro': '0.7543', 'eval_f1_macro': '0.7547', 'eval_runtime': '1.191', 'eval_samples_per_second': '843.1', 'eval_steps_per_second': '52.9', 'epoch': '3'}
{'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9774', 'grad_norm': '21.93', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.7446', 'eval_accuracy': '0.7221', 'eval_precision_macro': '0.739', 'eval_recall_macro': '0.7384', 'eval_f1_macro': '0.7345', 'eval_runtime': '1.236', 'eval_samples_per_second': '812.5', 'eval_steps_per_second': '50.98', 'epoch': '1'}
{'loss': '0.6562', 'grad_norm': '18.34', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7524', 'eval_accuracy': '0.749', 'eval_precision_macro': '0.7576', 'eval_recall_macro': '0.7652', 'eval_f1_macro': '0.7594', 'eval_runtime': '1.398', 'eval_samples_per_second': '718.4', 'eval_steps_per_second': '45.08', 'epoch': '2'}
{'loss': '0.5519', 'grad_norm': '27.66', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7547', 'eval_accuracy': '0.749', 'eval_precision_macro': '0.7503', 'eval_recall_macro': '0.7672', 'eval_f1_macro': '0.7572', 'eval_runtime': '1.242', 'eval_samples_per_second': '808.4', 'eval_steps_per_second': '50.73', 'epoch': '3'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.497', 'grad_norm': '129.5', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8357', 'eval_accuracy': '0.7082', 'eval_precision_macro': '0.7074', 'eval_recall_macro': '0.734', 'eval_f1_macro': '0.718', 'eval_runtime': '1.321', 'eval_samples_per_second': '759.8', 'eval_steps_per_second': '47.67', 'epoch': '1'}
{'loss': '0.8811', 'grad_norm': '36.62', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7131', 'eval_accuracy': '0.757', 'eval_precision_macro': '0.7553', 'eval_recall_macro': '0.7873', 'eval_f1_macro': '0.7674', 'eval_runtime': '1.232', 'eval_samples_per_second': '815', 'eval_steps_per_second': '51.14', 'epoch': '2'}
{'loss': '0.7508', 'grad_norm': '19.52', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7167', 'eval_accuracy': '0.76', 'eval_precision_macro': '0.7593', 'eval_recall_macro': '0.7902', 'eval_f1_macro': '0.7697', 'eval_runtime': '1.23', 'eval_samples_per_second': '816.2', 'eval_steps_per_second': '51.22', 'epoch': '3'}
{'lo

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.29', 'grad_norm': '19.05', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9451', 'eval_accuracy': '0.6743', 'eval_precision_macro': '0.6812', 'eval_recall_macro': '0.6856', 'eval_f1_macro': '0.681', 'eval_runtime': '1.264', 'eval_samples_per_second': '794', 'eval_steps_per_second': '49.82', 'epoch': '1'}
{'loss': '0.9095', 'grad_norm': '23.65', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.9692', 'eval_accuracy': '0.6932', 'eval_precision_macro': '0.6978', 'eval_recall_macro': '0.7285', 'eval_f1_macro': '0.702', 'eval_runtime': '1.188', 'eval_samples_per_second': '845.1', 'eval_steps_per_second': '53.03', 'epoch': '2'}
{'loss': '0.7756', 'grad_norm': '23.32', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8635', 'eval_accuracy': '0.7161', 'eval_precision_macro': '0.7136', 'eval_recall_macro': '0.7454', 'eval_f1_macro': '0.7242', 'eval_runtime': '1.227', 'eval_samples_per_second': '818.1', 'eval_steps_per_second': '51.34', 'epoch': '3'}
{

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9875', 'grad_norm': '25.42', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.734', 'eval_accuracy': '0.738', 'eval_precision_macro': '0.7401', 'eval_recall_macro': '0.7511', 'eval_f1_macro': '0.7426', 'eval_runtime': '1.258', 'eval_samples_per_second': '798.1', 'eval_steps_per_second': '50.08', 'epoch': '1'}
{'loss': '0.6677', 'grad_norm': '12.04', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8249', 'eval_accuracy': '0.738', 'eval_precision_macro': '0.7349', 'eval_recall_macro': '0.7725', 'eval_f1_macro': '0.7425', 'eval_runtime': '1.226', 'eval_samples_per_second': '818.8', 'eval_steps_per_second': '51.38', 'epoch': '2'}
{'loss': '0.5287', 'grad_norm': '37.22', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8682', 'eval_accuracy': '0.739', 'eval_precision_macro': '0.7447', 'eval_recall_macro': '0.7613', 'eval_f1_macro': '0.7446', 'eval_runtime': '1.252', 'eval_samples_per_second': '801.7', 'eval_steps_per_second': '50.31', 'epoch': '3'}

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.556', 'grad_norm': '30.99', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9456', 'eval_accuracy': '0.6653', 'eval_precision_macro': '0.6661', 'eval_recall_macro': '0.6871', 'eval_f1_macro': '0.6712', 'eval_runtime': '1.243', 'eval_samples_per_second': '807.8', 'eval_steps_per_second': '50.69', 'epoch': '1'}
{'loss': '0.9244', 'grad_norm': '22.47', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8482', 'eval_accuracy': '0.7181', 'eval_precision_macro': '0.7157', 'eval_recall_macro': '0.7498', 'eval_f1_macro': '0.7233', 'eval_runtime': '1.267', 'eval_samples_per_second': '792.4', 'eval_steps_per_second': '49.72', 'epoch': '2'}
{'loss': '0.7838', 'grad_norm': '86.34', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8385', 'eval_accuracy': '0.7201', 'eval_precision_macro': '0.7231', 'eval_recall_macro': '0.7652', 'eval_f1_macro': '0.7305', 'eval_runtime': '1.238', 'eval_samples_per_second': '811.3', 'eval_steps_per_second': '50.91', 'epoch': '

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.308', 'grad_norm': '28.56', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9507', 'eval_accuracy': '0.6703', 'eval_precision_macro': '0.6778', 'eval_recall_macro': '0.6794', 'eval_f1_macro': '0.6755', 'eval_runtime': '1.194', 'eval_samples_per_second': '840.5', 'eval_steps_per_second': '52.74', 'epoch': '1'}
{'loss': '0.8872', 'grad_norm': '16.47', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.9648', 'eval_accuracy': '0.6932', 'eval_precision_macro': '0.6991', 'eval_recall_macro': '0.7267', 'eval_f1_macro': '0.6997', 'eval_runtime': '1.194', 'eval_samples_per_second': '840.9', 'eval_steps_per_second': '52.77', 'epoch': '2'}
{'loss': '0.7632', 'grad_norm': '38.07', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.9505', 'eval_accuracy': '0.7012', 'eval_precision_macro': '0.7131', 'eval_recall_macro': '0.7269', 'eval_f1_macro': '0.7083', 'eval_runtime': '1.196', 'eval_samples_per_second': '839.6', 'eval_steps_per_second': '52.68', 'epoch': '

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

last_hidden_state: (1, 128, 768)

--- IndoBERTweet (indolem/indobertweet-base-uncased) ---
input_ids: (1, 128)
attention_mask: (1, 128)


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

last_hidden_state: (1, 128, 768)

--- IndoRoBERTa (cahya/roberta-base-indonesian-522M) ---
input_ids: (1, 128)
attention_mask: (1, 128)


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

last_hidden_state: (1, 128, 768)


===== TRAIN KFOLD k=8 fold=1 | IndoBERT =====


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9624', 'grad_norm': '31.97', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8524', 'eval_accuracy': '0.7281', 'eval_precision_macro': '0.7258', 'eval_recall_macro': '0.761', 'eval_f1_macro': '0.7366', 'eval_runtime': '1.117', 'eval_samples_per_second': '787.2', 'eval_steps_per_second': '49.26', 'epoch': '1'}
{'loss': '0.6678', 'grad_norm': '13.82', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7944', 'eval_accuracy': '0.7486', 'eval_precision_macro': '0.748', 'eval_recall_macro': '0.7767', 'eval_f1_macro': '0.7562', 'eval_runtime': '1.086', 'eval_samples_per_second': '809.7', 'eval_steps_per_second': '50.66', 'epoch': '2'}
{'loss': '0.5222', 'grad_norm': '27.28', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8794', 'eval_accuracy': '0.7338', 'eval_precision_macro': '0.7349', 'eval_recall_macro': '0.7476', 'eval_f1_macro': '0.7406', 'eval_runtime': '1.116', 'eval_samples_per_second': '787.8', 'eval_steps_per_second': '49.3', 'epoch': '3'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.535', 'grad_norm': '31.71', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9541', 'eval_accuracy': '0.6792', 'eval_precision_macro': '0.6868', 'eval_recall_macro': '0.6963', 'eval_f1_macro': '0.6798', 'eval_runtime': '1.078', 'eval_samples_per_second': '815.5', 'eval_steps_per_second': '51.03', 'epoch': '1'}
{'loss': '0.9169', 'grad_norm': '9.404', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.782', 'eval_accuracy': '0.7395', 'eval_precision_macro': '0.7365', 'eval_recall_macro': '0.7665', 'eval_f1_macro': '0.7471', 'eval_runtime': '1.097', 'eval_samples_per_second': '801.5', 'eval_steps_per_second': '50.15', 'epoch': '2'}
{'loss': '0.7488', 'grad_norm': '30.54', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7692', 'eval_accuracy': '0.7349', 'eval_precision_macro': '0.7417', 'eval_recall_macro': '0.7665', 'eval_f1_macro': '0.7467', 'eval_runtime': '1.074', 'eval_samples_per_second': '818.3', 'eval_steps_per_second': '51.2', 'epoch': '3'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.279', 'grad_norm': 'inf', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9845', 'eval_accuracy': '0.6746', 'eval_precision_macro': '0.6888', 'eval_recall_macro': '0.6913', 'eval_f1_macro': '0.6864', 'eval_runtime': '1.061', 'eval_samples_per_second': '828.5', 'eval_steps_per_second': '51.84', 'epoch': '1'}
{'loss': '0.8941', 'grad_norm': '35.46', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8794', 'eval_accuracy': '0.7099', 'eval_precision_macro': '0.7217', 'eval_recall_macro': '0.7279', 'eval_f1_macro': '0.7221', 'eval_runtime': '1.048', 'eval_samples_per_second': '838.4', 'eval_steps_per_second': '52.46', 'epoch': '2'}
{'loss': '0.7723', 'grad_norm': '53.16', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.9178', 'eval_accuracy': '0.7042', 'eval_precision_macro': '0.706', 'eval_recall_macro': '0.7299', 'eval_f1_macro': '0.7151', 'eval_runtime': '1.038', 'eval_samples_per_second': '847.2', 'eval_steps_per_second': '53.01', 'epoch': '3'}

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9935', 'grad_norm': '38.66', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.758', 'eval_accuracy': '0.752', 'eval_precision_macro': '0.7631', 'eval_recall_macro': '0.7687', 'eval_f1_macro': '0.7567', 'eval_runtime': '1.085', 'eval_samples_per_second': '809.8', 'eval_steps_per_second': '50.67', 'epoch': '1'}
{'loss': '0.6694', 'grad_norm': '8.33', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.6965', 'eval_accuracy': '0.7565', 'eval_precision_macro': '0.7656', 'eval_recall_macro': '0.7747', 'eval_f1_macro': '0.7651', 'eval_runtime': '1.086', 'eval_samples_per_second': '809.3', 'eval_steps_per_second': '50.64', 'epoch': '2'}
{'loss': '0.5376', 'grad_norm': '27.77', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7524', 'eval_accuracy': '0.7725', 'eval_precision_macro': '0.7777', 'eval_recall_macro': '0.7919', 'eval_f1_macro': '0.7794', 'eval_runtime': '1.082', 'eval_samples_per_second': '812.5', 'eval_steps_per_second': '50.84', 'epoch': '3'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.498', 'grad_norm': '37.45', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8516', 'eval_accuracy': '0.6894', 'eval_precision_macro': '0.6904', 'eval_recall_macro': '0.7111', 'eval_f1_macro': '0.6933', 'eval_runtime': '1.072', 'eval_samples_per_second': '819.7', 'eval_steps_per_second': '51.29', 'epoch': '1'}
{'loss': '0.8995', 'grad_norm': '8.503', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.6998', 'eval_accuracy': '0.7645', 'eval_precision_macro': '0.762', 'eval_recall_macro': '0.7946', 'eval_f1_macro': '0.7722', 'eval_runtime': '1.085', 'eval_samples_per_second': '810', 'eval_steps_per_second': '50.68', 'epoch': '2'}
{'loss': '0.7657', 'grad_norm': '23.58', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.69', 'eval_accuracy': '0.7622', 'eval_precision_macro': '0.7632', 'eval_recall_macro': '0.794', 'eval_f1_macro': '0.7717', 'eval_runtime': '1.078', 'eval_samples_per_second': '815.4', 'eval_steps_per_second': '51.02', 'epoch': '3'}
{'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.277', 'grad_norm': 'inf', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9377', 'eval_accuracy': '0.6815', 'eval_precision_macro': '0.6902', 'eval_recall_macro': '0.7036', 'eval_f1_macro': '0.6865', 'eval_runtime': '1.055', 'eval_samples_per_second': '833.5', 'eval_steps_per_second': '52.15', 'epoch': '1'}
{'loss': '0.8892', 'grad_norm': '11.64', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8151', 'eval_accuracy': '0.7179', 'eval_precision_macro': '0.7142', 'eval_recall_macro': '0.7459', 'eval_f1_macro': '0.7243', 'eval_runtime': '1.104', 'eval_samples_per_second': '796.2', 'eval_steps_per_second': '49.82', 'epoch': '2'}
{'loss': '0.7769', 'grad_norm': '36.53', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.9134', 'eval_accuracy': '0.7065', 'eval_precision_macro': '0.7099', 'eval_recall_macro': '0.7336', 'eval_f1_macro': '0.7101', 'eval_runtime': '1.153', 'eval_samples_per_second': '762.7', 'eval_steps_per_second': '47.72', 'epoch': '3'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9892', 'grad_norm': '23.08', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.694', 'eval_accuracy': '0.7611', 'eval_precision_macro': '0.7573', 'eval_recall_macro': '0.7788', 'eval_f1_macro': '0.7658', 'eval_runtime': '1.124', 'eval_samples_per_second': '782.2', 'eval_steps_per_second': '48.94', 'epoch': '1'}
{'loss': '0.6802', 'grad_norm': '13.67', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7015', 'eval_accuracy': '0.7668', 'eval_precision_macro': '0.7604', 'eval_recall_macro': '0.7916', 'eval_f1_macro': '0.7701', 'eval_runtime': '1.146', 'eval_samples_per_second': '767.2', 'eval_steps_per_second': '48', 'epoch': '2'}
{'loss': '0.5272', 'grad_norm': '29.75', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7303', 'eval_accuracy': '0.7759', 'eval_precision_macro': '0.7731', 'eval_recall_macro': '0.792', 'eval_f1_macro': '0.7803', 'eval_runtime': '1.123', 'eval_samples_per_second': '782.9', 'eval_steps_per_second': '48.99', 'epoch': '3'}


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.545', 'grad_norm': '49.34', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8862', 'eval_accuracy': '0.7053', 'eval_precision_macro': '0.7043', 'eval_recall_macro': '0.7203', 'eval_f1_macro': '0.708', 'eval_runtime': '1.149', 'eval_samples_per_second': '764.8', 'eval_steps_per_second': '47.85', 'epoch': '1'}
{'loss': '0.9572', 'grad_norm': '13.45', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7646', 'eval_accuracy': '0.7452', 'eval_precision_macro': '0.7396', 'eval_recall_macro': '0.7753', 'eval_f1_macro': '0.7511', 'eval_runtime': '1.13', 'eval_samples_per_second': '778', 'eval_steps_per_second': '48.68', 'epoch': '2'}
{'loss': '0.7837', 'grad_norm': '35.88', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7339', 'eval_accuracy': '0.76', 'eval_precision_macro': '0.757', 'eval_recall_macro': '0.7955', 'eval_f1_macro': '0.7662', 'eval_runtime': '1.175', 'eval_samples_per_second': '748', 'eval_steps_per_second': '46.8', 'epoch': '3'}
{'loss

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.319', 'grad_norm': '34.06', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9311', 'eval_accuracy': '0.6894', 'eval_precision_macro': '0.69', 'eval_recall_macro': '0.7194', 'eval_f1_macro': '0.696', 'eval_runtime': '1.115', 'eval_samples_per_second': '788.7', 'eval_steps_per_second': '49.35', 'epoch': '1'}
{'loss': '0.9069', 'grad_norm': '12.28', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.873', 'eval_accuracy': '0.7031', 'eval_precision_macro': '0.6988', 'eval_recall_macro': '0.7339', 'eval_f1_macro': '0.7096', 'eval_runtime': '1.115', 'eval_samples_per_second': '788.6', 'eval_steps_per_second': '49.35', 'epoch': '2'}
{'loss': '0.7663', 'grad_norm': '25.68', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.9348', 'eval_accuracy': '0.7179', 'eval_precision_macro': '0.7178', 'eval_recall_macro': '0.7454', 'eval_f1_macro': '0.7232', 'eval_runtime': '1.108', 'eval_samples_per_second': '793.2', 'eval_steps_per_second': '49.63', 'epoch': '3'}


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9696', 'grad_norm': '49.73', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.7839', 'eval_accuracy': '0.7349', 'eval_precision_macro': '0.7357', 'eval_recall_macro': '0.769', 'eval_f1_macro': '0.7418', 'eval_runtime': '1.16', 'eval_samples_per_second': '757.6', 'eval_steps_per_second': '47.41', 'epoch': '1'}
{'loss': '0.6747', 'grad_norm': '15.81', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7116', 'eval_accuracy': '0.7565', 'eval_precision_macro': '0.7551', 'eval_recall_macro': '0.7778', 'eval_f1_macro': '0.763', 'eval_runtime': '1.154', 'eval_samples_per_second': '761.8', 'eval_steps_per_second': '47.67', 'epoch': '2'}
{'loss': '0.5388', 'grad_norm': '31.75', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8139', 'eval_accuracy': '0.7452', 'eval_precision_macro': '0.7441', 'eval_recall_macro': '0.7671', 'eval_f1_macro': '0.7493', 'eval_runtime': '1.117', 'eval_samples_per_second': '787.2', 'eval_steps_per_second': '49.25', 'epoch': '3'

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.582', 'grad_norm': '66.91', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.013', 'eval_accuracy': '0.6348', 'eval_precision_macro': '0.6603', 'eval_recall_macro': '0.6535', 'eval_f1_macro': '0.6369', 'eval_runtime': '1.143', 'eval_samples_per_second': '768.8', 'eval_steps_per_second': '48.1', 'epoch': '1'}
{'loss': '0.9508', 'grad_norm': '56.81', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7392', 'eval_accuracy': '0.7429', 'eval_precision_macro': '0.7492', 'eval_recall_macro': '0.772', 'eval_f1_macro': '0.753', 'eval_runtime': '1.139', 'eval_samples_per_second': '771.6', 'eval_steps_per_second': '48.28', 'epoch': '2'}
{'loss': '0.7696', 'grad_norm': '71.95', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.769', 'eval_accuracy': '0.76', 'eval_precision_macro': '0.7571', 'eval_recall_macro': '0.7938', 'eval_f1_macro': '0.7675', 'eval_runtime': '1.139', 'eval_samples_per_second': '771.6', 'eval_steps_per_second': '48.28', 'epoch': '3'}
{'l

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.275', 'grad_norm': 'inf', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '1.032', 'eval_accuracy': '0.661', 'eval_precision_macro': '0.6651', 'eval_recall_macro': '0.6954', 'eval_f1_macro': '0.6719', 'eval_runtime': '1.153', 'eval_samples_per_second': '762.2', 'eval_steps_per_second': '47.69', 'epoch': '1'}
{'loss': '0.8972', 'grad_norm': '18.43', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.9218', 'eval_accuracy': '0.7008', 'eval_precision_macro': '0.697', 'eval_recall_macro': '0.7311', 'eval_f1_macro': '0.7052', 'eval_runtime': '1.125', 'eval_samples_per_second': '781.3', 'eval_steps_per_second': '48.89', 'epoch': '2'}
{'loss': '0.7656', 'grad_norm': '24.32', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.9364', 'eval_accuracy': '0.694', 'eval_precision_macro': '0.6924', 'eval_recall_macro': '0.7311', 'eval_f1_macro': '0.7004', 'eval_runtime': '1.109', 'eval_samples_per_second': '792.7', 'eval_steps_per_second': '49.6', 'epoch': '3'}
{'l

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.981', 'grad_norm': '29.41', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.7512', 'eval_accuracy': '0.7323', 'eval_precision_macro': '0.742', 'eval_recall_macro': '0.7591', 'eval_f1_macro': '0.7385', 'eval_runtime': '1.139', 'eval_samples_per_second': '771', 'eval_steps_per_second': '48.3', 'epoch': '1'}
{'loss': '0.6811', 'grad_norm': '30.16', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8252', 'eval_accuracy': '0.7426', 'eval_precision_macro': '0.7448', 'eval_recall_macro': '0.7712', 'eval_f1_macro': '0.7477', 'eval_runtime': '1.135', 'eval_samples_per_second': '773.7', 'eval_steps_per_second': '48.46', 'epoch': '2'}
{'loss': '0.5462', 'grad_norm': '22.49', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.747', 'eval_accuracy': '0.7597', 'eval_precision_macro': '0.7625', 'eval_recall_macro': '0.7788', 'eval_f1_macro': '0.7662', 'eval_runtime': '1.151', 'eval_samples_per_second': '762.9', 'eval_steps_per_second': '47.79', 'epoch': '3'}
{

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.471', 'grad_norm': '41.53', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8146', 'eval_accuracy': '0.7016', 'eval_precision_macro': '0.7019', 'eval_recall_macro': '0.7222', 'eval_f1_macro': '0.707', 'eval_runtime': '1.146', 'eval_samples_per_second': '765.9', 'eval_steps_per_second': '47.98', 'epoch': '1'}
{'loss': '0.8643', 'grad_norm': '42.25', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7054', 'eval_accuracy': '0.7574', 'eval_precision_macro': '0.7573', 'eval_recall_macro': '0.7814', 'eval_f1_macro': '0.7635', 'eval_runtime': '1.143', 'eval_samples_per_second': '768', 'eval_steps_per_second': '48.11', 'epoch': '2'}
{'loss': '0.7494', 'grad_norm': '17.98', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.6695', 'eval_accuracy': '0.7665', 'eval_precision_macro': '0.7656', 'eval_recall_macro': '0.7878', 'eval_f1_macro': '0.7742', 'eval_runtime': '1.145', 'eval_samples_per_second': '766.8', 'eval_steps_per_second': '48.04', 'epoch': '3'}

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.296', 'grad_norm': '28.48', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9718', 'eval_accuracy': '0.6674', 'eval_precision_macro': '0.6778', 'eval_recall_macro': '0.6936', 'eval_f1_macro': '0.6745', 'eval_runtime': '1.103', 'eval_samples_per_second': '796.2', 'eval_steps_per_second': '49.88', 'epoch': '1'}
{'loss': '0.9032', 'grad_norm': '28.11', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.9113', 'eval_accuracy': '0.7084', 'eval_precision_macro': '0.7105', 'eval_recall_macro': '0.7349', 'eval_f1_macro': '0.7148', 'eval_runtime': '1.104', 'eval_samples_per_second': '795.6', 'eval_steps_per_second': '49.84', 'epoch': '2'}
{'loss': '0.7876', 'grad_norm': '12.82', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8495', 'eval_accuracy': '0.7084', 'eval_precision_macro': '0.712', 'eval_recall_macro': '0.7369', 'eval_f1_macro': '0.7182', 'eval_runtime': '1.112', 'eval_samples_per_second': '789.5', 'eval_steps_per_second': '49.46', 'epoch': '3

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9983', 'grad_norm': '36.58', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.6731', 'eval_accuracy': '0.7699', 'eval_precision_macro': '0.7722', 'eval_recall_macro': '0.7929', 'eval_f1_macro': '0.7781', 'eval_runtime': '1.154', 'eval_samples_per_second': '760.8', 'eval_steps_per_second': '47.66', 'epoch': '1'}
{'loss': '0.6798', 'grad_norm': '11.28', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.6874', 'eval_accuracy': '0.7847', 'eval_precision_macro': '0.7984', 'eval_recall_macro': '0.7994', 'eval_f1_macro': '0.7928', 'eval_runtime': '1.161', 'eval_samples_per_second': '756.4', 'eval_steps_per_second': '47.38', 'epoch': '2'}
{'loss': '0.5589', 'grad_norm': '43.1', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.6841', 'eval_accuracy': '0.7961', 'eval_precision_macro': '0.7968', 'eval_recall_macro': '0.8152', 'eval_f1_macro': '0.8033', 'eval_runtime': '1.155', 'eval_samples_per_second': '760.2', 'eval_steps_per_second': '47.62', 'epoch': '

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.451', 'grad_norm': '37.01', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.7441', 'eval_accuracy': '0.7585', 'eval_precision_macro': '0.7646', 'eval_recall_macro': '0.7808', 'eval_f1_macro': '0.7689', 'eval_runtime': '1.116', 'eval_samples_per_second': '786.7', 'eval_steps_per_second': '49.28', 'epoch': '1'}
{'loss': '0.8926', 'grad_norm': '34.9', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.6204', 'eval_accuracy': '0.7825', 'eval_precision_macro': '0.7886', 'eval_recall_macro': '0.8097', 'eval_f1_macro': '0.7953', 'eval_runtime': '1.151', 'eval_samples_per_second': '762.8', 'eval_steps_per_second': '47.78', 'epoch': '2'}
{'loss': '0.7418', 'grad_norm': '28.06', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.633', 'eval_accuracy': '0.7733', 'eval_precision_macro': '0.7717', 'eval_recall_macro': '0.8065', 'eval_f1_macro': '0.784', 'eval_runtime': '1.139', 'eval_samples_per_second': '770.7', 'eval_steps_per_second': '48.28', 'epoch': '3'}

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.31', 'grad_norm': '22.46', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8763', 'eval_accuracy': '0.7141', 'eval_precision_macro': '0.7182', 'eval_recall_macro': '0.7273', 'eval_f1_macro': '0.7177', 'eval_runtime': '1.162', 'eval_samples_per_second': '755.8', 'eval_steps_per_second': '47.35', 'epoch': '1'}
{'loss': '0.9023', 'grad_norm': '26.03', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.8666', 'eval_accuracy': '0.7335', 'eval_precision_macro': '0.7344', 'eval_recall_macro': '0.75', 'eval_f1_macro': '0.7377', 'eval_runtime': '1.114', 'eval_samples_per_second': '788', 'eval_steps_per_second': '49.36', 'epoch': '2'}
{'loss': '0.7786', 'grad_norm': '23.45', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8834', 'eval_accuracy': '0.7175', 'eval_precision_macro': '0.7201', 'eval_recall_macro': '0.7443', 'eval_f1_macro': '0.7237', 'eval_runtime': '1.134', 'eval_samples_per_second': '774.2', 'eval_steps_per_second': '48.49', 'epoch': '3'}
{

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9853', 'grad_norm': '16.95', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.7863', 'eval_accuracy': '0.7335', 'eval_precision_macro': '0.7319', 'eval_recall_macro': '0.7602', 'eval_f1_macro': '0.741', 'eval_runtime': '1.137', 'eval_samples_per_second': '772', 'eval_steps_per_second': '48.36', 'epoch': '1'}
{'loss': '0.6622', 'grad_norm': '7.977', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7973', 'eval_accuracy': '0.7563', 'eval_precision_macro': '0.7566', 'eval_recall_macro': '0.7828', 'eval_f1_macro': '0.7657', 'eval_runtime': '1.152', 'eval_samples_per_second': '761.9', 'eval_steps_per_second': '47.73', 'epoch': '2'}
{'loss': '0.542', 'grad_norm': '5.79', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.8104', 'eval_accuracy': '0.7437', 'eval_precision_macro': '0.7488', 'eval_recall_macro': '0.7691', 'eval_f1_macro': '0.7547', 'eval_runtime': '1.147', 'eval_samples_per_second': '765.8', 'eval_steps_per_second': '47.97', 'epoch': '3'}


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.516', 'grad_norm': '39.54', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9449', 'eval_accuracy': '0.6834', 'eval_precision_macro': '0.6768', 'eval_recall_macro': '0.7045', 'eval_f1_macro': '0.6839', 'eval_runtime': '1.165', 'eval_samples_per_second': '753.5', 'eval_steps_per_second': '47.2', 'epoch': '1'}
{'loss': '0.9243', 'grad_norm': '28.08', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '0.7806', 'eval_accuracy': '0.7517', 'eval_precision_macro': '0.7502', 'eval_recall_macro': '0.7797', 'eval_f1_macro': '0.7601', 'eval_runtime': '1.163', 'eval_samples_per_second': '754.9', 'eval_steps_per_second': '47.29', 'epoch': '2'}
{'loss': '0.7673', 'grad_norm': '146.7', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.7481', 'eval_accuracy': '0.7506', 'eval_precision_macro': '0.7502', 'eval_recall_macro': '0.7829', 'eval_f1_macro': '0.7599', 'eval_runtime': '1.135', 'eval_samples_per_second': '773.8', 'eval_steps_per_second': '48.48', 'epoch': '3

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '1.257', 'grad_norm': '26.67', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.9984', 'eval_accuracy': '0.6743', 'eval_precision_macro': '0.6719', 'eval_recall_macro': '0.6967', 'eval_f1_macro': '0.6812', 'eval_runtime': '1.1', 'eval_samples_per_second': '798.5', 'eval_steps_per_second': '50.02', 'epoch': '1'}
{'loss': '0.8961', 'grad_norm': '10.81', 'learning_rate': '1.8e-05', 'epoch': '2'}
{'eval_loss': '1.029', 'eval_accuracy': '0.6868', 'eval_precision_macro': '0.6851', 'eval_recall_macro': '0.7059', 'eval_f1_macro': '0.6921', 'eval_runtime': '1.158', 'eval_samples_per_second': '758.4', 'eval_steps_per_second': '47.51', 'epoch': '2'}
{'loss': '0.7705', 'grad_norm': '8.491', 'learning_rate': '1.7e-05', 'epoch': '3'}
{'eval_loss': '0.9304', 'eval_accuracy': '0.6879', 'eval_precision_macro': '0.6919', 'eval_recall_macro': '0.7011', 'eval_f1_macro': '0.6926', 'eval_runtime': '1.116', 'eval_samples_per_second': '786.5', 'eval_steps_per_second': '49.27', 'epoch': '3'}

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

{'loss': '0.9745', 'grad_norm': '20.56', 'learning_rate': '1.9e-05', 'epoch': '1'}
{'eval_loss': '0.8036', 'eval_accuracy': '0.7403', 'eval_precision_macro': '0.7417', 'eval_recall_macro': '0.7707', 'eval_f1_macro': '0.7491', 'eval_runtime': '1.141', 'eval_samples_per_second': '769.5', 'eval_steps_per_second': '48.2', 'epoch': '1'}


RuntimeError: [enforce fail at inline_container.cc:664] . unexpected pos 329289728 vs 329289620