# CONFIGURATION

In [None]:
import os

MODEL_NAMES = [
    "joeddav/xlm-roberta-large-xnli",  # (xong) lr: 6e-06
    "microsoft/infoxlm-large",  # (xong) lr: 6e-6
    "uitnlp/CafeBERT",  # (xong) lr: 2e-5
    "FacebookAI/xlm-roberta-large",  # (xong) lr: 6e-6
    "MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli",  # (xong) lr: 6e-6
    "MoritzLaurer/ernie-m-large-mnli-xnli",
    "microsoft/deberta-xlarge-mnli",
]


class Config:
    ROOT_DIR = os.getcwd()
    # --- ƒê∆∞·ªùng d·∫´n v√† T√™n file ---
    DATA_DIR = os.path.join(ROOT_DIR, "data")
    TRAIN_FILE = os.path.join(DATA_DIR, "vihallu-train.csv")
    TEST_FILE = os.path.join(DATA_DIR, "vihallu-public-test.csv")

    SUBMISSION_DIR = os.path.join(ROOT_DIR, "submission")
    SUBMISSION_CSV = "submit.csv"
    SUBMISSION_ZIP = "submit.zip"

    MODEL_NAME = MODEL_NAMES[5]  # Doi index de chon model
    MODEL_OUTPUT_DIR = os.path.join(
        ROOT_DIR, "models", f"{MODEL_NAME.split('/')[-1]}-tuned"
    )

    # --- C·∫•u h√¨nh Tokenizer ---
    MAX_LENGTH = 512

    # --- C·∫•u h√¨nh Hu·∫•n luy·ªán ---
    RANDOM_STATE = 42
    EPOCHS = 10
    BATCH_SIZE = 2
    GRADIENT_ACCUMULATION_STEPS = 8  # TƒÉng d·∫ßn ƒë·ªÉ ph√π h·ª£p v·ªõi GPU VRAM

    SCHEDULER_TYPE = "cosine"  # "linear"  # Ch·ªçn scheduler ph√π h·ª£p
    LEARNING_RATE = 1e-5
    WEIGHT_DECAY = 0.03
    CLASSIFIER_DROPOUT = 0.02
    EPSILON = 1e-8
    PATIENCE_LIMIT = 2
    TOTAL_STEP_SCALE = 0.1  # S·ª≠ d·ª•ng s·ªë b∆∞·ªõc ƒë·ªÉ warm-up
    LABEL_SMOOTHING = 0.05  # Th√™m ƒë·ªÉ regularize v√† tr√°nh overfitting
    VALIDATION_SPLIT_SIZE = 0.2

    # --- √Ånh x·∫° Nh√£n ---
    LABEL_MAP = {
        "intrinsic": 0,
        "extrinsic": 1,
        "no": 2,
    }  # contradiction/neutral/entailment
    ID2LABEL = {v: k for k, v in LABEL_MAP.items()}

    # << TH√äM D√íNG N√ÄY (s·ª≠ d·ª•ng con s·ªë b·∫°n t√≠nh ƒë∆∞·ª£c t·ª´ EDA)
    CLASS_WEIGHTS = [1.0393466963622866, 1.0114145354717525, 0.9531590413943355]
    # CLASS_WEIGHTS = None


cfg = Config()


# LOGGER

In [None]:
import logging
import os
from datetime import datetime

# Th∆∞ m·ª•c g·ªëc ƒë·ªÉ l∆∞u t·∫•t c·∫£ c√°c file log
LOG_BASE_DIR = "logs"

# D√πng m·ªôt dictionary ƒë·ªÉ l∆∞u c√°c logger ƒë√£ t·∫°o, tr√°nh vi·ªác t·∫°o l·∫°i v√† g√¢y ra log tr√πng l·∫∑p
_loggers = {}


def setup_logger(model_name: str, log_level=logging.INFO):
    """
    Thi·∫øt l·∫≠p v√† tr·∫£ v·ªÅ m·ªôt logger ƒë·ªÉ ghi log v√†o c·∫£ console v√† file.

    - M·ªói model s·∫Ω c√≥ m·ªôt th∆∞ m·ª•c log ri√™ng d·ª±a tr√™n `model_name`.
    - M·ªói l·∫ßn ch·∫°y s·∫Ω t·∫°o m·ªôt file log m·ªõi c√≥ t√™n l√† timestamp (v√≠ d·ª•: 2023-10-27_15-30-00.log).
    - ƒê·∫£m b·∫£o kh√¥ng c√≥ log n√†o b·ªã ghi ƒë√®.

    Args:
        model_name (str): T√™n c·ªßa model, d√πng ƒë·ªÉ t·∫°o th∆∞ m·ª•c con. V√≠ d·ª•: 'xnli-large-tuned'.
        log_level (int): C·∫•p ƒë·ªô log, m·∫∑c ƒë·ªãnh l√† logging.INFO.

    Returns:
        logging.Logger: Instance c·ªßa logger ƒë√£ ƒë∆∞·ª£c c·∫•u h√¨nh.
    """
    # N·∫øu logger cho model n√†y ƒë√£ t·ªìn t·∫°i, tr·∫£ v·ªÅ n√≥ ngay l·∫≠p t·ª©c
    if model_name in _loggers:
        return _loggers[model_name]

    # X·ª≠ l√Ω t√™n model ƒë·ªÉ an to√†n khi t·∫°o t√™n th∆∞ m·ª•c (thay th·∫ø "/")
    safe_model_name = model_name.replace("/", "_").replace("\\", "_")
    model_log_dir = os.path.join(LOG_BASE_DIR, safe_model_name)
    os.makedirs(model_log_dir, exist_ok=True)

    # T·∫°o logger
    logger = logging.getLogger(safe_model_name)
    logger.setLevel(log_level)

    # NgƒÉn kh√¥ng cho log lan truy·ªÅn ƒë·∫øn root logger ƒë·ªÉ tr√°nh in ra console 2 l·∫ßn
    logger.propagate = False

    # ƒê·ªãnh d·∫°ng cho log message
    formatter = logging.Formatter(
        "%(asctime)s - [%(levelname)s] - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
    )

    # T·∫°o File Handler ƒë·ªÉ ghi log ra file
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    log_file_path = os.path.join(model_log_dir, f"{timestamp}.log")

    file_handler = logging.FileHandler(log_file_path, encoding="utf-8")
    file_handler.setLevel(log_level)
    file_handler.setFormatter(formatter)

    # T·∫°o Console (Stream) Handler ƒë·ªÉ in log ra m√†n h√¨nh
    console_handler = logging.StreamHandler()
    console_handler.setLevel(log_level)
    console_handler.setFormatter(formatter)

    # Th√™m c√°c handler v√†o logger
    logger.addHandler(file_handler)
    logger.addHandler(console_handler)

    # L∆∞u logger v√†o cache
    _loggers[model_name] = logger

    logger.info(
        f"Logger cho '{safe_model_name}' ƒë√£ ƒë∆∞·ª£c kh·ªüi t·∫°o. File log: {log_file_path}"
    )

    return logger


## Setup logger

In [3]:
logger = setup_logger(f"{cfg.MODEL_NAME}-training")
logger.info(f"Logger initialized for {cfg.MODEL_NAME}")

logger.info("=" * 60)
logger.info("üöÄ STARTING TRAINING SESSION")
logger.info("=" * 60)
for key, value in Config.__dict__.items():
    if not key.startswith("__") and not callable(value):
        logger.info(f"{key}: {value}")
logger.info("=" * 60)


2025-10-16 15:20:51 - [INFO] - Logger cho 'MoritzLaurer_ernie-m-large-mnli-xnli-training' ƒë√£ ƒë∆∞·ª£c kh·ªüi t·∫°o. File log: logs/MoritzLaurer_ernie-m-large-mnli-xnli-training/2025-10-16_15-20-51.log
2025-10-16 15:20:51 - [INFO] - Logger initialized for MoritzLaurer/ernie-m-large-mnli-xnli
2025-10-16 15:20:51 - [INFO] - üöÄ STARTING TRAINING SESSION
2025-10-16 15:20:51 - [INFO] - ROOT_DIR: /home/guest/Projects/CS221
2025-10-16 15:20:51 - [INFO] - DATA_DIR: /home/guest/Projects/CS221/data
2025-10-16 15:20:51 - [INFO] - TRAIN_FILE: /home/guest/Projects/CS221/data/vihallu-train.csv
2025-10-16 15:20:51 - [INFO] - TEST_FILE: /home/guest/Projects/CS221/data/vihallu-public-test.csv
2025-10-16 15:20:51 - [INFO] - SUBMISSION_DIR: /home/guest/Projects/CS221/submission
2025-10-16 15:20:51 - [INFO] - SUBMISSION_CSV: submit.csv
2025-10-16 15:20:51 - [INFO] - SUBMISSION_ZIP: submit.zip
2025-10-16 15:20:51 - [INFO] - MODEL_NAME: MoritzLaurer/ernie-m-large-mnli-xnli
2025-10-16 15:20:51 - [INFO] - 

# Hallucination Dataset

In [None]:
import torch
from torch.utils.data import Dataset
import pandas as pd
from sklearn.model_selection import train_test_split
import os


# Nhi·ªám v·ª• c·ªßa n√≥ b√¢y gi·ªù ch·ªâ l√† l·∫•y ra c√°c item ƒë√£ ƒë∆∞·ª£c x·ª≠ l√Ω s·∫µn
class PreTokenizedDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        # L·∫•y t·∫•t c·∫£ c√°c encoding cho item th·ª© idx
        # items() tr·∫£ v·ªÅ m·ªôt c·∫∑p (key, value)
        # v√≠ d·ª• key='input_ids', value=m·ªôt list ch·ª©a t·∫•t c·∫£ input_ids
        # val[idx] s·∫Ω l·∫•y ra input_ids cho m·∫´u th·ª© idx
        item = {key: val[idx] for key, val in self.encodings.items()}

        # Th√™m nh√£n v√†o item
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item


## Prepare data

In [None]:
def prepare_data(config, logger=None):
    """
    ƒê·ªçc, ti·ªÅn x·ª≠ l√Ω, chia d·ªØ li·ªáu th√†nh t·∫≠p train/validation v√† L∆ØU ch√∫ng ra file.
    Tr·∫£ v·ªÅ: train_df, val_df
    """
    if os.path.isabs(getattr(config, "TRAIN_FILE", "")) or os.path.exists(
        getattr(config, "TRAIN_FILE", "")
    ):
        data_path = config.TRAIN_FILE
    else:
        data_path = os.path.join(config.DATA_DIR, config.TRAIN_FILE)

    try:
        df = pd.read_csv(data_path)
        print(f"‚úÖ ƒê·ªçc th√†nh c√¥ng {len(df)} m·∫´u t·ª´ {data_path}")
    except FileNotFoundError:
        print(f"‚ùå L·ªói: Kh√¥ng t√¨m th·∫•y file d·ªØ li·ªáu t·∫°i {data_path}")
        return None, None

    # Chuy·ªÉn ƒë·ªïi c√°c c·ªôt sang string ƒë·ªÉ tr√°nh l·ªói khi c√≥ gi√° tr·ªã NaN
    df["context"] = df["context"].astype(str)
    df["prompt"] = df["prompt"].astype(str)
    df["response"] = df["response"].astype(str)

    # df["input_text"] = (
    #     df["prompt"] + " </s></s> " + df["response"] + " </s></s> " + df["context"]
    # )

    # ( # <-- S·ª¨A ƒê·ªîI ) √Åp d·ª•ng Template NLI chu·∫©n (Context-First)
    # -----------------------------------------------------------------
    # G·ª£i √Ω: ƒê√¢y l√† ƒë·ªãnh d·∫°ng hi·ªáu qu·∫£ cho c√°c m√¥ h√¨nh NLI.
    # C·∫•u tr√∫c: [Premise] </s></s> [Hypothesis]

    # Gh√©p Prompt v√† Context th√†nh ngu·ªìn ch√¢n l√Ω (premise) -> Th√™m .astype(str) ƒë·ªÉ tƒÉng ƒë·ªô ·ªïn ƒë·ªãnh
    premise = (
        "C√¢u h·ªèi: "
        + df["prompt"].astype(str)
        + " Ng·ªØ c·∫£nh: "
        + df["context"].astype(str)
    )

    # Response l√† gi·∫£ thuy·∫øt (hypothesis) c·∫ßn ki·ªÉm ch·ª©ng
    hypothesis = df["response"].astype(str)

    # T·∫°o c·ªôt input_text cu·ªëi c√πng
    df["input_text"] = premise + " </s></s> " + hypothesis
    # -----------------------------------------------------------------

    # In m·ªôt v√†i v√≠ d·ª• ƒë·ªÉ ki·ªÉm tra
    print("\n=== KI·ªÇM TRA FORMAT D·ªÆ LI·ªÜU M·ªöI ===")
    sample = df["input_text"].iloc[0]
    print(f"M·∫´u input: {sample}...")

    # √Ånh x·∫° nh√£n theo logic NLI m·ªõi
    df["label_id"] = df["label"].map(config.LABEL_MAP)

    # X·ª≠ l√Ω c√°c d√≤ng c√≥ th·ªÉ c√≥ nh√£n null sau khi map
    df.dropna(subset=["label_id"], inplace=True)
    df["label_id"] = df["label_id"].astype(int)

    # Chia train/validation
    train_df, val_df = train_test_split(
        df,
        test_size=config.VALIDATION_SPLIT_SIZE,
        random_state=config.RANDOM_STATE,
        stratify=df["label_id"],  # ƒê·∫£m b·∫£o ph√¢n b·ªï nh√£n ƒë·ªÅu
    )

    if logger:
        logger.info(
            f"Chia d·ªØ li·ªáu: {len(train_df)} m·∫´u train, {len(val_df)} m·∫´u validation."
        )

    # --- PH·∫¶N N√ÇNG C·∫§P: L∆ØU FILE RA TH∆Ø M·ª§C DATA ---
    # T·∫°o th∆∞ m·ª•c 'processed' trong 'data' n·∫øu ch∆∞a c√≥
    processed_data_dir = os.path.join(config.DATA_DIR, "processed")
    os.makedirs(processed_data_dir, exist_ok=True)

    # ƒê·ªãnh nghƒ©a ƒë∆∞·ªùng d·∫´n file
    train_output_path = os.path.join(processed_data_dir, "train_split.csv")
    val_output_path = os.path.join(processed_data_dir, "validation_split.csv")

    # L∆∞u c√°c DataFrame
    train_df.to_csv(train_output_path, index=False, encoding="utf-8-sig")
    val_df.to_csv(val_output_path, index=False, encoding="utf-8-sig")

    print(f"‚úÖ ƒê√£ l∆∞u t·∫≠p train v√†o: {train_output_path}")
    print(f"‚úÖ ƒê√£ l∆∞u t·∫≠p validation v√†o: {val_output_path}")
    # --- K·∫æT TH√öC PH·∫¶N N√ÇNG C·∫§P ---

    return train_df, val_df


# Model

In [None]:
from transformers import AutoConfig, AutoTokenizer, AutoModelForSequenceClassification
import torch.nn as nn


def get_model_and_tokenizer(config):
    """T·∫£i pre-trained model v√† tokenizer."""
    print(f"ƒêang t·∫£i model: {config.MODEL_NAME}")
    tokenizer = AutoTokenizer.from_pretrained(config.MODEL_NAME)

    # T·∫£i config/model/tokenizer v·ªõi trust_remote_code=True ƒë·ªÉ cho ph√©p model custom
    cfg = AutoConfig.from_pretrained(config.MODEL_NAME, trust_remote_code=True)
    print(f"Model config: {cfg}")

    model = AutoModelForSequenceClassification.from_pretrained(
        config.MODEL_NAME, num_labels=len(config.LABEL_MAP)
    )

    # apply classifier dropout if provided in config
    if hasattr(config, "CLASSIFIER_DROPOUT"):
        if hasattr(model.config, "classifier_dropout"):
            model.config.classifier_dropout = config.CLASSIFIER_DROPOUT
        if hasattr(model.config, "hidden_dropout_prob"):
            model.config.hidden_dropout_prob = config.CLASSIFIER_DROPOUT

        if hasattr(model.config, "attention_probs_dropout_prob"):
            model.config.attention_probs_dropout_prob = min(
                0.15, max(0.1, config.CLASSIFIER_DROPOUT)
            )
        for m in model.modules():
            if isinstance(m, nn.Dropout):
                m.p = config.CLASSIFIER_DROPOUT
    return model, tokenizer


# Training

In [None]:
import os
import math
import torch
import numpy as np
import pandas as pd
from dotenv import load_dotenv
from tqdm.auto import tqdm
from torch.optim import AdamW
from huggingface_hub import login
from transformers import get_scheduler
from torch.utils.data import DataLoader
from tqdm.contrib.logging import logging_redirect_tqdm
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import f1_score, accuracy_score, classification_report
from functools import partial


## train one epoch function

In [None]:
def train_one_epoch(
    model,
    data_loader,
    loss_fn,
    optimizer,
    scheduler,
    device,
    epoch=None,
    total_epochs=None,
    gradient_accumulation_steps=1,
):
    """Hu·∫•n luy·ªán m√¥ h√¨nh trong m·ªôt epoch b·∫±ng gradient accumulation."""
    model.train()
    total_loss = 0
    desc = f"Train" if epoch is None else f"Epoch {epoch}/{total_epochs}"
    progress_bar = tqdm(
        data_loader, desc=desc, leave=False, dynamic_ncols=True, mininterval=0.5
    )

    optimizer.zero_grad()
    steps_in_epoch = len(data_loader)
    with logging_redirect_tqdm():  # make logger calls safe
        for step, batch in enumerate(progress_bar):
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits  # shape (batch_size, num_labels)

            loss = loss_fn(logits, labels)
            total_loss += loss.item()
            scaled_loss = loss / gradient_accumulation_steps
            scaled_loss.backward()

            if (step + 1) % gradient_accumulation_steps == 0 or (
                step + 1
            ) == steps_in_epoch:
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()
                scheduler.step()
                optimizer.zero_grad()

            progress_bar.set_postfix({"loss": f"{loss.item():.4f}"})

    return total_loss / len(data_loader)


## Evaluate

In [None]:
def evaluate(model, data_loader, loss_fn, device):
    """ƒê√°nh gi√° m√¥ h√¨nh tr√™n t·∫≠p d·ªØ li·ªáu."""
    model.eval()
    all_preds = []
    all_labels = []
    total_val_loss = 0

    progress_bar = tqdm(data_loader, desc="Evaluating", leave=False, dynamic_ncols=True)

    with torch.no_grad(), logging_redirect_tqdm():
        for batch in progress_bar:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits

            # <<< T√çNH LOSS TR√äN T·∫¨P VALIDATION
            loss = loss_fn(logits, labels)
            total_val_loss += loss.item()

            preds = torch.argmax(outputs.logits, dim=-1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    avg_val_loss = total_val_loss / len(data_loader)  # <<< T√çNH LOSS TRUNG B√åNH
    return all_labels, all_preds, avg_val_loss  # <<< TR·∫¢ V·ªÄ TH√äM LOSS


# Main

In [10]:
# T·∫£i bi·∫øn m√¥i tr∆∞·ªùng t·ª´ file envs/.env.
dotenv_path = os.path.join(os.getcwd(), "envs", ".env")
load_dotenv(dotenv_path)
print(f"dotenv_path: {dotenv_path}")


dotenv_path: /home/guest/Projects/CS221/envs/.env


In [11]:
# l·∫•y HF token ƒë·ªÉ login
hf_token = os.getenv("HUGGING_FACE_TOKEN")

if hf_token:
    print("INFO: T√¨m th·∫•y HUGGING_FACE_TOKEN. ƒêang ƒëƒÉng nh·∫≠p...")
    login(token=hf_token)
    print("INFO: ƒêƒÉng nh·∫≠p Hugging Face th√†nh c√¥ng.")
else:
    print(
        "WARNING: Kh√¥ng t√¨m th·∫•y HUGGING_FACE_TOKEN trong file .env. M·ªôt s·ªë model c√≥ th·ªÉ y√™u c·∫ßu ƒëƒÉng nh·∫≠p."
    )


INFO: T√¨m th·∫•y HUGGING_FACE_TOKEN. ƒêang ƒëƒÉng nh·∫≠p...
INFO: ƒêƒÉng nh·∫≠p Hugging Face th√†nh c√¥ng.


## 1. Chu·∫©n b·ªã d·ªØ li·ªáu

In [12]:
logger.info("B·∫Øt ƒë·∫ßu pipeline hu·∫•n luy·ªán.")

# 1. Chu·∫©n b·ªã d·ªØ li·ªáu
logger.info("B∆∞·ªõc 1: Chu·∫©n b·ªã d·ªØ li·ªáu...")
train_df, val_df = prepare_data(cfg, logger=logger)
if train_df is None:
    logger.error("D·ªØ li·ªáu kh√¥ng th·ªÉ ƒë∆∞·ª£c chu·∫©n b·ªã. D·ª´ng ch∆∞∆°ng tr√¨nh.")


2025-10-16 15:20:53 - [INFO] - B·∫Øt ƒë·∫ßu pipeline hu·∫•n luy·ªán.
2025-10-16 15:20:53 - [INFO] - B∆∞·ªõc 1: Chu·∫©n b·ªã d·ªØ li·ªáu...
2025-10-16 15:20:53 - [INFO] - Chia d·ªØ li·ªáu: 5600 m·∫´u train, 1400 m·∫´u validation.


‚úÖ ƒê·ªçc th√†nh c√¥ng 7000 m·∫´u t·ª´ /home/guest/Projects/CS221/data/vihallu-train.csv

=== KI·ªÇM TRA FORMAT D·ªÆ LI·ªÜU M·ªöI ===
M·∫´u input: C√¢u h·ªèi: V√†o nh·ªØng nƒÉm 1960, n∆°i n√†o tr·ªü th√†nh trung t√¢m c·ªßa th·∫ø h·ªá √¢m nh·∫°c beat v√† folk, m·∫∑c d√π Jackson Pollock v√† Willem de Kooning ƒë√£ t·ªï ch·ª©c nhi·ªÅu bu·ªïi tri·ªÉn l√£m √¢m nh·∫°c n·ªïi ti·∫øng t·∫°i ƒë√≥? Ng·ªØ c·∫£nh: V√†o nh·ªØng nƒÉm 1870, hai nh√† ƒëi√™u kh·∫Øc Augustus Saint-Gaudens v√† Daniel Chester French sinh s·ªëng v√† l√†m vi·ªác g·∫ßn Qu·∫£ng tr∆∞·ªùng. ƒê·∫øn nh·ªØng nƒÉm 1920, C√¥ng vi√™n Qu·∫£ng tr∆∞·ªùng Washington ƒë∆∞·ª£c c√¥ng nh·∫≠n c·∫•p qu·ªëc gia l√† m·ªôt trung t√¢m c·ªßa phong tr√†o n·ªïi lo·∫°n v·ªÅ ngh·ªá thu·∫≠t v√† ƒë·∫°o ƒë·ª©c. Do ƒë√≥, khu√¥n vi√™n NYU t·∫°i Qu·∫£ng tr∆∞·ªùng Washington tr·ªü n√™n ƒëa d·∫°ng v√† h·ªëi h·∫£ nh·ªù c√≥ nƒÉng l∆∞·ª£ng c·ªßa cu·ªôc s·ªëng ƒë√¥ th·ªã, ƒëi·ªÅu n√†y ƒë√£ d·∫´n ƒë·∫øn nh·ªØng s·ª± thay ƒë·ªïi v·ªÅ m·∫∑t h·ªçc thu·∫≠t ·ªü NYU. Nh·ª

## 2. T·∫£i model v√† tokenizer

In [13]:
logger.info(f"B∆∞·ªõc 2: T·∫£i model '{cfg.MODEL_NAME}' v√† tokenizer...")
model, tokenizer = get_model_and_tokenizer(cfg)


2025-10-16 15:20:53 - [INFO] - B∆∞·ªõc 2: T·∫£i model 'MoritzLaurer/ernie-m-large-mnli-xnli' v√† tokenizer...


ƒêang t·∫£i model: MoritzLaurer/ernie-m-large-mnli-xnli
Model config: ErnieMConfig {
  "act_dropout": 0.0,
  "architectures": [
    "ErnieMForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "dtype": "float32",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "entailment",
    "1": "neutral",
    "2": "contradiction"
  },
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "label2id": {
    "contradiction": 2,
    "entailment": 0,
    "neutral": 1
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "ernie_m",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "pad_token_id": 1,
  "paddlenlp_version": null,
  "problem_type": "single_label_classification",
  "transformers_version": "4.57.1",
  "type_vocab_size": 16,
  "vocab_size": 250002
}



In [14]:
# %pip install torchinfo


In [15]:
from torchinfo import summary
import torch  # ƒê·∫£m b·∫£o ƒë√£ import torch

logger.info("Ph√¢n t√≠ch ki·∫øn tr√∫c m√¥ h√¨nh b·∫±ng torchinfo...")

# --- D√πng torchinfo ƒë·ªÉ hi·ªÉn th·ªã ---
# T·∫°o m·ªôt input gi·∫£ v·ªõi batch_size v√† max_length nh∆∞ trong config
input_ids_example = torch.randint(
    0, tokenizer.vocab_size, (cfg.BATCH_SIZE, cfg.MAX_LENGTH)
)

# 1. G·ªçi summary v·ªõi verbose=0 ƒë·ªÉ kh√¥ng in ra console v√† l∆∞u k·∫øt qu·∫£ v√†o bi·∫øn
#    Th√™m c√°c c·ªôt b·∫°n mu·ªën xem, v√≠ d·ª•: 'output_size', 'num_params'
model_summary = summary(
    model,
    input_data={"input_ids": input_ids_example},
    verbose=0,  # <-- Quan tr·ªçng: NgƒÉn kh√¥ng cho t·ª± ƒë·ªông in
    col_names=["input_size", "output_size", "num_params", "mult_adds"],
)

# 2. Chuy·ªÉn ƒë·ªëi t∆∞·ª£ng summary th√†nh string v√† ƒë∆∞a v√†o logger
logger.info(f"Ki·∫øn tr√∫c chi ti·∫øt c·ªßa m√¥ h√¨nh:\n{str(model_summary)}")


# # (T√πy ch·ªçn) B·∫°n v·∫´n c√≥ th·ªÉ in ra m√†n h√¨nh n·∫øu mu·ªën xem ngay trong notebook
# print("In summary ra m√†n h√¨nh notebook:")
# print(model_summary)


2025-10-16 15:20:55 - [INFO] - Ph√¢n t√≠ch ki·∫øn tr√∫c m√¥ h√¨nh b·∫±ng torchinfo...
2025-10-16 15:20:57 - [INFO] - Ki·∫øn tr√∫c chi ti·∫øt c·ªßa m√¥ h√¨nh:
Layer (type:depth-idx)                                       Input Shape               Output Shape              Param #                   Mult-Adds
ErnieMForSequenceClassification                              --                        [2, 3]                    --                        --
‚îú‚îÄErnieMModel: 1-1                                           [2, 512]                  [2, 1024]                 --                        --
‚îÇ    ‚îî‚îÄErnieMEmbeddings: 2-1                                 --                        [2, 512, 1024]            --                        --
‚îÇ    ‚îÇ    ‚îî‚îÄEmbedding: 3-1                                   [2, 512]                  [2, 512, 1024]            256,002,048               512,004,096
‚îÇ    ‚îÇ    ‚îî‚îÄEmbedding: 3-2                                   [2, 512]                  [2,

## 3. T·∫°o Dataset v√† DataLoader

In [16]:
# Cell [15] - PHI√äN B·∫¢N HO√ÄN CH·ªàNH
from torch.utils.data import DataLoader

logger.info("B∆∞·ªõc 3: B·∫Øt ƒë·∫ßu x·ª≠ l√Ω v√† t·∫°o DataLoader v·ªõi Static Padding...")

# --- 3.1: L·∫•y danh s√°ch c√¢u v√† nh√£n ---
train_texts = train_df["input_text"].to_list()
val_texts = val_df["input_text"].to_list()
train_labels = train_df["label_id"].to_list()
val_labels = val_df["label_id"].to_list()

# --- 3.2: Tokenize, Pad v√† chuy·ªÉn sang Tensor c√πng l√∫c ---
logger.info("Tokenizing v√† Padding to√†n b·ªô t·∫≠p train...")
train_encodings = tokenizer(
    train_texts,
    max_length=cfg.MAX_LENGTH,
    truncation=True,
    padding="max_length",
    return_attention_mask=True,
    return_tensors="pt",  # <-- THAY ƒê·ªîI QUAN TR·ªåNG: Y√™u c·∫ßu tr·∫£ v·ªÅ PyTorch Tensor
)

logger.info("Tokenizing v√† Padding to√†n b·ªô t·∫≠p validation...")
val_encodings = tokenizer(
    val_texts,
    max_length=cfg.MAX_LENGTH,
    truncation=True,
    padding="max_length",
    return_attention_mask=True,
    return_tensors="pt",  # <-- THAY ƒê·ªîI QUAN TR·ªåNG: Y√™u c·∫ßu tr·∫£ v·ªÅ PyTorch Tensor
)

# --- 3.3: T·∫°o Dataset t·ª´ d·ªØ li·ªáu ƒë√£ ƒë∆∞·ª£c x·ª≠ l√Ω ---
logger.info("T·∫°o Dataset...")
train_dataset = PreTokenizedDataset(encodings=train_encodings, labels=train_labels)
val_dataset = PreTokenizedDataset(encodings=val_encodings, labels=val_labels)

# --- 3.4: T·∫°o DataLoader (KH√îNG C·∫¶N collate_fn) ---
logger.info("T·∫°o DataLoader...")
train_loader = DataLoader(train_dataset, batch_size=cfg.BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=cfg.BATCH_SIZE)

logger.info("‚úÖ T·∫°o DataLoader th√†nh c√¥ng!")


2025-10-16 15:20:57 - [INFO] - B∆∞·ªõc 3: B·∫Øt ƒë·∫ßu x·ª≠ l√Ω v√† t·∫°o DataLoader v·ªõi Static Padding...
2025-10-16 15:20:57 - [INFO] - Tokenizing v√† Padding to√†n b·ªô t·∫≠p train...
2025-10-16 15:20:57 - [INFO] - Tokenizing v√† Padding to√†n b·ªô t·∫≠p train...
2025-10-16 15:21:01 - [INFO] - Tokenizing v√† Padding to√†n b·ªô t·∫≠p validation...
2025-10-16 15:21:01 - [INFO] - T·∫°o Dataset...
2025-10-16 15:21:01 - [INFO] - T·∫°o DataLoader...
2025-10-16 15:21:01 - [INFO] - ‚úÖ T·∫°o DataLoader th√†nh c√¥ng!


In [17]:
gradient_accumulation_steps = max(1, cfg.GRADIENT_ACCUMULATION_STEPS)
effective_batch_size = cfg.BATCH_SIZE * gradient_accumulation_steps
logger.info(
    "Gradient accumulation steps: %s | Effective batch size: %s",
    gradient_accumulation_steps,
    effective_batch_size,
)


2025-10-16 15:21:02 - [INFO] - Gradient accumulation steps: 8 | Effective batch size: 16


### Check dataset

In [18]:
print("\n--- Ki·ªÉm tra 1 batch d·ªØ li·ªáu ƒë·∫ßu v√†o ---")
sample_batch = next(iter(train_loader))
print("K√≠ch th∆∞·ªõc input_ids:", sample_batch["input_ids"].shape)
print("K√≠ch th∆∞·ªõc attention_mask:", sample_batch["attention_mask"].shape)
print("Nh√£n trong batch:", sample_batch["labels"])

# Gi·∫£i m√£ m·ªôt m·∫´u ƒë·ªÉ xem n√≥ tr√¥ng nh∆∞ th·∫ø n√†o
decoded_text = tokenizer.decode(sample_batch["input_ids"][0], skip_special_tokens=False)
print("\nM·ªôt m·∫´u ƒë√£ ƒë∆∞·ª£c token h√≥a v√† gi·∫£i m√£ l·∫°i:")
print(decoded_text)
print("------------------------------------------\n")
# --- K·∫æT TH√öC B∆Ø·ªöC KI·ªÇM TRA ---



--- Ki·ªÉm tra 1 batch d·ªØ li·ªáu ƒë·∫ßu v√†o ---
K√≠ch th∆∞·ªõc input_ids: torch.Size([2, 512])
K√≠ch th∆∞·ªõc attention_mask: torch.Size([2, 512])
Nh√£n trong batch: tensor([2, 1])

M·ªôt m·∫´u ƒë√£ ƒë∆∞·ª£c token h√≥a v√† gi·∫£i m√£ l·∫°i:
[CLS] C√¢u h·ªèi: Cu·ªôc c√°ch m·∫°ng Baal Teshuva, v·ªën kh√¥ng c√≥ t√°c ƒë·ªông nh√¢n kh·∫©u h·ªçc n√†o ƒë√°ng k·ªÉ, ƒë√£ l√†m cho nh·ªØng ng∆∞·ªùi Do Th√°i th·∫ø t·ª•c tr·ªü th√†nh nh·ªØng ng∆∞·ªùi Do Th√°i s√πng ƒë·∫°o nh∆∞ th·∫ø n√†o? Ng·ªØ c·∫£nh: C≈©ng c√≥ m·ªôt xu h∆∞·ªõng c·ªßa c√°c cu·ªôc c√°ch m·∫°ng ƒë∆∞·ª£c kh·ªüi x∆∞·ªõng t·ª´ ph√°i Do Th√°i gi√°o Ch√≠nh th·ªëng ƒë·ªÉ h·ªó tr·ª£ v√† gi√∫p ƒë·ª° nh·ªØng ng∆∞·ªùi Do Th√°i th·∫ø t·ª•c h∆∞·ªõng v·ªÅ m·ªôt b·∫£n s·∫Øc Do Th√°i truy·ªÅn th·ªëng ƒë·∫≠m ƒë√† b·∫£n s·∫Øc d√¢n t·ªôc Do Th√°i ƒë·ªÉ gi·∫£m thi·ªÉu t·ª∑ l·ªá k·∫øt h√¥n kh√°c ch·ªßng t·ªôc v√† vi·ªác k·∫øt h√¥n kh√°c t√¥n gi√°o. Nh·ªù nh·ªØng n·ªó l·ª±c c·ªßa c√°c cu·ªôc c√°ch m·∫°ng ·∫•y c√πng v·ªõi c√°c nh√≥m Do Th√°i kh√°c, t

## 4. Thi·∫øt l·∫≠p Hu·∫•n luy·ªán

In [19]:
import torch  # ƒê·∫£m b·∫£o ƒë√£ import torch

logger.info("B∆∞·ªõc 4: Thi·∫øt l·∫≠p m√¥i tr∆∞·ªùng hu·∫•n luy·ªán v√† ki·∫øn tr√∫c model...")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- Log th√¥ng tin thi·∫øt b·ªã (GPU/CPU) ---
logger.info(f"S·ª≠ d·ª•ng thi·∫øt b·ªã: {device}")
if device.type == "cuda":
    gpu_count = torch.cuda.device_count()
    gpu_name = torch.cuda.get_device_name(0)
    logger.info(f"‚úÖ T√¨m th·∫•y {gpu_count} GPU(s).")
    logger.info(f"‚úÖ ƒêang s·ª≠ d·ª•ng GPU: {gpu_name}")
else:
    logger.warning("‚ö†Ô∏è Kh√¥ng t√¨m th·∫•y GPU, s·ª≠ d·ª•ng CPU. Qu√° tr√¨nh training s·∫Ω r·∫•t ch·∫≠m.")

# --- B·∫ÆT ƒê·∫¶U PH·∫¶N TH√äM M·ªöI ---
# Chuy·ªÉn to√†n b·ªô ki·∫øn tr√∫c model th√†nh d·∫°ng string ƒë·ªÉ ƒë∆∞a v√†o logger
model_architecture_string = str(model)

# Ghi log ki·∫øn tr√∫c model
logger.info(f"Ki·∫øn tr√∫c c·ªßa m√¥ h√¨nh:\n{model_architecture_string}")
# --- K·∫æT TH√öC PH·∫¶N TH√äM M·ªöI ---

# Di chuy·ªÉn model ƒë·∫øn device ƒë√£ ch·ªçn
model.to(device)


2025-10-16 15:21:02 - [INFO] - B∆∞·ªõc 4: Thi·∫øt l·∫≠p m√¥i tr∆∞·ªùng hu·∫•n luy·ªán v√† ki·∫øn tr√∫c model...
2025-10-16 15:21:02 - [INFO] - S·ª≠ d·ª•ng thi·∫øt b·ªã: cuda
2025-10-16 15:21:02 - [INFO] - ‚úÖ T√¨m th·∫•y 1 GPU(s).
2025-10-16 15:21:02 - [INFO] - ‚úÖ ƒêang s·ª≠ d·ª•ng GPU: NVIDIA GeForce RTX 5070 Ti
2025-10-16 15:21:02 - [INFO] - Ki·∫øn tr√∫c c·ªßa m√¥ h√¨nh:
ErnieMForSequenceClassification(
  (ernie_m): ErnieMModel(
    (embeddings): ErnieMEmbeddings(
      (word_embeddings): Embedding(250002, 1024, padding_idx=1)
      (position_embeddings): Embedding(514, 1024, padding_idx=1)
      (layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.02, inplace=False)
    )
    (encoder): ErnieMEncoder(
      (layers): ModuleList(
        (0-23): 24 x ErnieMEncoderLayer(
          (self_attn): ErnieMAttention(
            (self_attn): ErnieMSelfAttention(
              (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
      

ErnieMForSequenceClassification(
  (ernie_m): ErnieMModel(
    (embeddings): ErnieMEmbeddings(
      (word_embeddings): Embedding(250002, 1024, padding_idx=1)
      (position_embeddings): Embedding(514, 1024, padding_idx=1)
      (layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.02, inplace=False)
    )
    (encoder): ErnieMEncoder(
      (layers): ModuleList(
        (0-23): 24 x ErnieMEncoderLayer(
          (self_attn): ErnieMAttention(
            (self_attn): ErnieMSelfAttention(
              (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.02, inplace=False)
            )
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (linear1): Linear(in_features=1024, out_features=4096,

In [20]:
optimizer = AdamW(
    model.parameters(),
    lr=cfg.LEARNING_RATE,
    weight_decay=cfg.WEIGHT_DECAY,
    eps=cfg.EPSILON,
)
num_update_steps_per_epoch = math.ceil(len(train_loader) / gradient_accumulation_steps)
num_training_steps = num_update_steps_per_epoch * cfg.EPOCHS
logger.info(
    "Scheduler will run for %s total steps (%s per epoch)",
    num_training_steps,
    num_update_steps_per_epoch,
)


2025-10-16 15:21:02 - [INFO] - Scheduler will run for 3500 total steps (350 per epoch)


- Cosine Scheduler:
    Sau khi warmup, scheduler s·∫Ω gi·∫£m learning rate theo ƒë∆∞·ªùng cong cosine. Ban ƒë·∫ßu n√≥ gi·∫£m r·∫•t ch·∫≠m (gi·ªØ ·ªü m·ª©c LR cao trong th·ªùi gian d√†i h∆°n), sau ƒë√≥ gi·∫£m nhanh d·∫ßn v√† cu·ªëi c√πng l·∫°i gi·∫£m r·∫•t ch·∫≠m khi v·ªÅ g·∫ßn 0.
    
    T·∫°i sao n√≥ hi·ªáu qu·∫£? √ù t∆∞·ªüng l√† vi·ªác gi·ªØ learning rate ·ªü m·ª©c cao l√¢u h∆°n gi√∫p model c√≥ c∆° h·ªôi "kh√°m ph√°" v√† tho√°t kh·ªèi c√°c ƒëi·ªÉm t·ªëi ∆∞u k√©m (local minima). Sau ƒë√≥, vi·ªác gi·∫£m t·ªëc t·ª´ t·ª´ ·ªü cu·ªëi gi√∫p n√≥ tinh ch·ªânh m·ªôt c√°ch ch√≠nh x√°c.

In [21]:
if cfg.TOTAL_STEP_SCALE <= 0:
    warmup_steps = 0
elif cfg.TOTAL_STEP_SCALE <= 1:
    warmup_steps = max(1, int(cfg.TOTAL_STEP_SCALE * num_training_steps))
else:
    warmup_steps = min(int(cfg.TOTAL_STEP_SCALE), num_training_steps)

scheduler = get_scheduler(
    cfg.SCHEDULER_TYPE,
    optimizer=optimizer,
    num_warmup_steps=warmup_steps,
    num_training_steps=num_training_steps,
)
logger.info("Warmup steps: %s", warmup_steps)


2025-10-16 15:21:02 - [INFO] - Warmup steps: 350


In [22]:
# Chuy·ªÉn class weights t·ª´ config th√†nh tensor v√† ƒë∆∞a l√™n device
if cfg.CLASS_WEIGHTS:
    logger.info("S·ª≠ d·ª•ng Class Weights & Label smoothing cho h√†m loss.")
    class_weights_tensor = torch.tensor(cfg.CLASS_WEIGHTS, dtype=torch.float).to(device)
    loss_fn = torch.nn.CrossEntropyLoss(
        weight=class_weights_tensor,
        label_smoothing=cfg.LABEL_SMOOTHING,
    ).to(device)
else:
    logger.info("S·ª≠ d·ª•ng CrossEntropyLoss th√¥ng th∆∞·ªùng (kh√¥ng c√≥ tr·ªçng s·ªë).")
    loss_fn = torch.nn.CrossEntropyLoss().to(device)


2025-10-16 15:21:02 - [INFO] - S·ª≠ d·ª•ng Class Weights & Label smoothing cho h√†m loss.


## 5. V√≤ng l·∫∑p Hu·∫•n luy·ªán

In [23]:
best_macro_f1 = 0.0
patience_counter = 0  # bien dem => early stopped khi f1 ko tang them => overfitting

for epoch in range(cfg.EPOCHS):
    logger.info(f"--- Epoch {epoch + 1}/{cfg.EPOCHS} ---")

    avg_train_loss = train_one_epoch(
        model,
        train_loader,
        loss_fn,
        optimizer,
        scheduler,
        device,
        epoch + 1,
        cfg.EPOCHS,
        gradient_accumulation_steps=gradient_accumulation_steps,
    )
    logger.info(f"Loss trung b√¨nh tr√™n t·∫≠p train: {avg_train_loss:.4f}")

    current_lr = optimizer.param_groups[0]["lr"]
    logger.info(
        f"Current Learning Rate: {current_lr:.2e}"
    )  # D√πng ƒë·ªãnh d·∫°ng khoa h·ªçc e.g., 8.00e-06

    # ƒê√°nh gi√° tr√™n t·∫≠p validation
    logger.info("B·∫Øt ƒë·∫ßu ƒë√°nh gi√° tr√™n t·∫≠p validation...")
    val_labels, val_preds, avg_val_loss = evaluate(model, val_loader, loss_fn, device)

    accuracy = accuracy_score(val_labels, val_preds)
    macro_f1 = f1_score(val_labels, val_preds, average="macro")

    logger.info(f"Validation Loss: {avg_val_loss:.4f}")
    logger.info(f"Validation Accuracy: {accuracy:.4f}")
    logger.info(f"Validation Macro-F1: {macro_f1:.4f}")

    # In classification report chi ti·∫øt
    print("\nClassification Report tr√™n t·∫≠p validation:")
    target_names = [cfg.ID2LABEL[i] for i in range(len(cfg.LABEL_MAP))]
    # print(
    #     classification_report(
    #         val_labels, val_preds, target_names=target_names, digits=4
    #     )
    # )

    # In classification report chi ti·∫øt (c√≥ th·ªÉ gi·ªØ l·∫°i print ho·∫∑c log t·ª´ng d√≤ng)
    report = classification_report(
        val_labels,
        val_preds,
        target_names=[cfg.ID2LABEL[i] for i in range(len(cfg.LABEL_MAP))],
        digits=4,
    )
    logger.info(f"Classification Report tr√™n t·∫≠p validation:\n{report}")

    # L∆∞u l·∫°i model t·ªët nh·∫•t d·ª±a tr√™n Macro-F1
    if macro_f1 > best_macro_f1:
        best_macro_f1 = macro_f1
        patience_counter = 0  # << RESET B·ªò ƒê·∫æM

        logger.info(
            f"üéâ Macro-F1 c·∫£i thi·ªán. ƒêang l∆∞u model t·ªët nh·∫•t v√†o '{cfg.MODEL_OUTPUT_DIR}'..."
        )
        if not os.path.exists(cfg.MODEL_OUTPUT_DIR):
            os.makedirs(cfg.MODEL_OUTPUT_DIR)

        model.save_pretrained(cfg.MODEL_OUTPUT_DIR)
        tokenizer.save_pretrained(cfg.MODEL_OUTPUT_DIR)
        logger.info("L∆∞u model th√†nh c√¥ng.")
    else:
        patience_counter += 1
        logger.warning(
            f"Macro-F1 kh√¥ng c·∫£i thi·ªán. Patience: {patience_counter}/{cfg.PATIENCE_LIMIT}"
        )
        if patience_counter >= cfg.PATIENCE_LIMIT:
            logger.info("Early stopping! D·ª´ng hu·∫•n luy·ªán.")
            break


2025-10-16 15:21:02 - [INFO] - --- Epoch 1/10 ---


Epoch 1/10:   0%|          | 0/2800 [00:00<?, ?it/s]

2025-10-16 15:27:03 - [INFO] - Loss trung b√¨nh tr√™n t·∫≠p train: 1.5303
2025-10-16 15:27:03 - [INFO] - Current Learning Rate: 8.00e-06
2025-10-16 15:27:03 - [INFO] - B·∫Øt ƒë·∫ßu ƒë√°nh gi√° tr√™n t·∫≠p validation...


Evaluating:   0%|          | 0/700 [00:00<?, ?it/s]

2025-10-16 15:27:32 - [INFO] - Validation Loss: 0.8831
2025-10-16 15:27:32 - [INFO] - Validation Accuracy: 0.6136
2025-10-16 15:27:32 - [INFO] - Validation Macro-F1: 0.6139
2025-10-16 15:27:32 - [INFO] - Classification Report tr√™n t·∫≠p validation:
              precision    recall  f1-score   support

   intrinsic     0.5345    0.5061    0.5199       490
   extrinsic     0.6929    0.7245    0.7084       461
          no     0.6101    0.6169    0.6135       449

    accuracy                         0.6136      1400
   macro avg     0.6125    0.6159    0.6139      1400
weighted avg     0.6109    0.6136    0.6120      1400

2025-10-16 15:27:32 - [INFO] - üéâ Macro-F1 c·∫£i thi·ªán. ƒêang l∆∞u model t·ªët nh·∫•t v√†o '/home/guest/Projects/CS221/models/ernie-m-large-mnli-xnli-tuned'...



Classification Report tr√™n t·∫≠p validation:


2025-10-16 15:27:34 - [INFO] - L∆∞u model th√†nh c√¥ng.
2025-10-16 15:27:34 - [INFO] - --- Epoch 2/10 ---


Epoch 2/10:   0%|          | 0/2800 [00:00<?, ?it/s]

2025-10-16 15:33:41 - [INFO] - Loss trung b√¨nh tr√™n t·∫≠p train: 0.7119
2025-10-16 15:33:41 - [INFO] - Current Learning Rate: 7.76e-06
2025-10-16 15:33:41 - [INFO] - B·∫Øt ƒë·∫ßu ƒë√°nh gi√° tr√™n t·∫≠p validation...


Evaluating:   0%|          | 0/700 [00:00<?, ?it/s]

2025-10-16 15:34:10 - [INFO] - Validation Loss: 0.7283
2025-10-16 15:34:10 - [INFO] - Validation Accuracy: 0.7386
2025-10-16 15:34:10 - [INFO] - Validation Macro-F1: 0.7405
2025-10-16 15:34:10 - [INFO] - Classification Report tr√™n t·∫≠p validation:
              precision    recall  f1-score   support

   intrinsic     0.6667    0.7388    0.7009       490
   extrinsic     0.7897    0.7007    0.7425       461
          no     0.7790    0.7773    0.7781       449

    accuracy                         0.7386      1400
   macro avg     0.7451    0.7389    0.7405      1400
weighted avg     0.7432    0.7386    0.7394      1400

2025-10-16 15:34:10 - [INFO] - üéâ Macro-F1 c·∫£i thi·ªán. ƒêang l∆∞u model t·ªët nh·∫•t v√†o '/home/guest/Projects/CS221/models/ernie-m-large-mnli-xnli-tuned'...



Classification Report tr√™n t·∫≠p validation:


2025-10-16 15:34:12 - [INFO] - L∆∞u model th√†nh c√¥ng.
2025-10-16 15:34:12 - [INFO] - --- Epoch 3/10 ---


Epoch 3/10:   0%|          | 0/2800 [00:00<?, ?it/s]

2025-10-16 15:40:22 - [INFO] - Loss trung b√¨nh tr√™n t·∫≠p train: 0.5187
2025-10-16 15:40:22 - [INFO] - Current Learning Rate: 7.06e-06
2025-10-16 15:40:22 - [INFO] - B·∫Øt ƒë·∫ßu ƒë√°nh gi√° tr√™n t·∫≠p validation...


Evaluating:   0%|          | 0/700 [00:00<?, ?it/s]

2025-10-16 15:40:51 - [INFO] - Validation Loss: 0.7591
2025-10-16 15:40:51 - [INFO] - Validation Accuracy: 0.7500
2025-10-16 15:40:51 - [INFO] - Validation Macro-F1: 0.7508
2025-10-16 15:40:51 - [INFO] - Classification Report tr√™n t·∫≠p validation:
              precision    recall  f1-score   support

   intrinsic     0.7261    0.6980    0.7118       490
   extrinsic     0.7437    0.7679    0.7556       461
          no     0.7815    0.7884    0.7849       449

    accuracy                         0.7500      1400
   macro avg     0.7504    0.7514    0.7508      1400
weighted avg     0.7497    0.7500    0.7497      1400

2025-10-16 15:40:51 - [INFO] - üéâ Macro-F1 c·∫£i thi·ªán. ƒêang l∆∞u model t·ªët nh·∫•t v√†o '/home/guest/Projects/CS221/models/ernie-m-large-mnli-xnli-tuned'...



Classification Report tr√™n t·∫≠p validation:


2025-10-16 15:40:53 - [INFO] - L∆∞u model th√†nh c√¥ng.
2025-10-16 15:40:53 - [INFO] - --- Epoch 4/10 ---


Epoch 4/10:   0%|          | 0/2800 [00:00<?, ?it/s]

2025-10-16 15:47:06 - [INFO] - Loss trung b√¨nh tr√™n t·∫≠p train: 0.3588
2025-10-16 15:47:06 - [INFO] - Current Learning Rate: 6.00e-06
2025-10-16 15:47:06 - [INFO] - B·∫Øt ƒë·∫ßu ƒë√°nh gi√° tr√™n t·∫≠p validation...


Evaluating:   0%|          | 0/700 [00:00<?, ?it/s]

2025-10-16 15:47:34 - [INFO] - Validation Loss: 0.9048
2025-10-16 15:47:34 - [INFO] - Validation Accuracy: 0.7371
2025-10-16 15:47:34 - [INFO] - Validation Macro-F1: 0.7380
2025-10-16 15:47:34 - [INFO] - Classification Report tr√™n t·∫≠p validation:
              precision    recall  f1-score   support

   intrinsic     0.6971    0.6857    0.6914       490
   extrinsic     0.7619    0.7289    0.7450       461
          no     0.7547    0.8018    0.7775       449

    accuracy                         0.7371      1400
   macro avg     0.7379    0.7388    0.7380      1400
weighted avg     0.7369    0.7371    0.7367      1400

2025-10-16 15:47:34 - [INFO] - --- Epoch 5/10 ---



Classification Report tr√™n t·∫≠p validation:


Epoch 5/10:   0%|          | 0/2800 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [24]:
logger.info("üèÅ Qu√° tr√¨nh hu·∫•n luy·ªán ho√†n t·∫•t.")
logger.info(
    f"Model t·ªët nh·∫•t v·ªõi Macro-F1 = {best_macro_f1:.4f} ƒë√£ ƒë∆∞·ª£c l∆∞u t·∫°i '{cfg.MODEL_OUTPUT_DIR}'"
)


2025-10-16 15:47:51 - [INFO] - üèÅ Qu√° tr√¨nh hu·∫•n luy·ªán ho√†n t·∫•t.
2025-10-16 15:47:51 - [INFO] - Model t·ªët nh·∫•t v·ªõi Macro-F1 = 0.7508 ƒë√£ ƒë∆∞·ª£c l∆∞u t·∫°i '/home/guest/Projects/CS221/models/ernie-m-large-mnli-xnli-tuned'
2025-10-16 15:47:51 - [INFO] - Model t·ªët nh·∫•t v·ªõi Macro-F1 = 0.7508 ƒë√£ ƒë∆∞·ª£c l∆∞u t·∫°i '/home/guest/Projects/CS221/models/ernie-m-large-mnli-xnli-tuned'


# Ph√¢n ph·ªëi k·∫øt qu·∫£ ƒë√∫ng/sai theo t·ª´ng l·ªõp

In [25]:
val_label_names = [cfg.ID2LABEL[label_id] for label_id in val_labels]
pred_label_names = [cfg.ID2LABEL[pred_id] for pred_id in val_preds]
evaluation_df = pd.DataFrame(
    {
        "true_label": val_label_names,
        "predicted_label": pred_label_names,
    }
)
evaluation_df["status"] = evaluation_df.apply(
    lambda row: (
        "correct" if row["true_label"] == row["predicted_label"] else "incorrect"
    ),
    axis=1,
)
distribution_table = (
    evaluation_df.groupby(["true_label", "status"])
    .size()
    .unstack(fill_value=0)
    .rename_axis(None, axis=1)
    .reset_index()
    .sort_values("true_label")
)

# 1. Th√™m c·ªôt 'total' b·∫±ng c√°ch c·ªông c·ªôt 'correct' v√† 'incorrect'
distribution_table["total"] = (
    distribution_table["correct"] + distribution_table["incorrect"]
)

# 2. Th√™m c·ªôt t·ªâ l·ªá ƒë√∫ng (correct_rate)
distribution_table["correct_rate"] = (
    distribution_table["correct"] / distribution_table["total"]
)

# 3. Th√™m c·ªôt t·ªâ l·ªá sai (incorrect_rate)
distribution_table["incorrect_rate"] = (
    distribution_table["incorrect"] / distribution_table["total"]
)

# (T√πy ch·ªçn) Format c√°c c·ªôt t·ªâ l·ªá th√†nh d·∫°ng ph·∫ßn trƒÉm cho d·ªÖ ƒë·ªçc
distribution_table["correct_rate"] = distribution_table["correct_rate"].map(
    "{:.2%}".format
)
distribution_table["incorrect_rate"] = distribution_table["incorrect_rate"].map(
    "{:.2%}".format
)

# In ra b·∫£ng k·∫øt qu·∫£
logger.info(f"Ph√¢n ph·ªëi k·∫øt qu·∫£ tr√™n t·ª´ng l·ªõp:\n{distribution_table.to_string()}")

# Trong notebook, d√πng display() s·∫Ω cho b·∫£ng ƒë·∫πp h∆°n
print("B·∫£ng ph√¢n ph·ªëi k·∫øt qu·∫£ tr√™n t·ª´ng l·ªõp:")
display(distribution_table)


2025-10-16 15:47:52 - [INFO] - Ph√¢n ph·ªëi k·∫øt qu·∫£ tr√™n t·ª´ng l·ªõp:
  true_label  correct  incorrect  total correct_rate incorrect_rate
0  extrinsic      336        125    461       72.89%         27.11%
1  intrinsic      336        154    490       68.57%         31.43%
2         no      360         89    449       80.18%         19.82%


B·∫£ng ph√¢n ph·ªëi k·∫øt qu·∫£ tr√™n t·ª´ng l·ªõp:


Unnamed: 0,true_label,correct,incorrect,total,correct_rate,incorrect_rate
0,extrinsic,336,125,461,72.89%,27.11%
1,intrinsic,336,154,490,68.57%,31.43%
2,no,360,89,449,80.18%,19.82%
