# CONFIGURATION

In [None]:
# Cell [1] - ƒê√É C·∫¨P NH·∫¨T
import os

MODEL_NAMES = [
    "joeddav/xlm-roberta-large-xnli",
    "microsoft/infoxlm-large",
    "uitnlp/CafeBERT",
    "FacebookAI/xlm-roberta-large",
    "MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli",
    "MoritzLaurer/ernie-m-large-mnli-xnli",
    "microsoft/deberta-xlarge-mnli",
]


class Config:
    ROOT_DIR = os.getcwd()
    DATA_DIR = os.path.join(ROOT_DIR, "data")

    # --- THAY ƒê·ªîI QUAN TR·ªåNG ---
    # Tr·ªè ƒë·∫øn file ƒë√£ ƒë∆∞·ª£c x·ª≠ l√Ω b·∫±ng semantic search
    TRAIN_FILE = os.path.join(DATA_DIR, "vihallu-train.csv")

    TEST_FILE = os.path.join(DATA_DIR, "vihallu-public-test.csv")
    SUBMISSION_DIR = os.path.join(ROOT_DIR, "submission")
    SUBMISSION_CSV = "submit.csv"
    SUBMISSION_ZIP = "submit.zip"

    MODEL_NAME = MODEL_NAMES[2]
    MODEL_OUTPUT_DIR = os.path.join(
        ROOT_DIR, "models", f"{MODEL_NAME.split('/')[-1]}-tuned"
    )

    MAX_LENGTH = 512
    RANDOM_STATE = 42
    EPOCHS = 10
    BATCH_SIZE = 1
    GRADIENT_ACCUMULATION_STEPS = 16
    SCHEDULER_TYPE = "cosine"
    LEARNING_RATE = 8e-6  # Gi·∫£m LR m·ªôt ch√∫t cho PET
    WEIGHT_DECAY = 0.02
    CLASSIFIER_DROPOUT = 0.05
    EPSILON = 1e-8
    PATIENCE_LIMIT = 2
    TOTAL_STEP_SCALE = 0.1
    LABEL_SMOOTHING = 0.05
    VALIDATION_SPLIT_SIZE = 0.2

    # Gi·ªØ nguy√™n mapping ƒë·ªÉ ƒë·ªçc d·ªØ li·ªáu, nh∆∞ng s·∫Ω ƒë∆∞·ª£c √°nh x·∫° l·∫°i trong PET
    LABEL_MAP = {"intrinsic": 0, "extrinsic": 1, "no": 2}
    ID2LABEL = {v: k for k, v in LABEL_MAP.items()}
    CLASS_WEIGHTS = [1.0393466963622866, 1.0114145354717525, 0.9531590413943355]


cfg = Config()


# LOGGER

In [None]:
import logging
import os
from datetime import datetime

# Th∆∞ m·ª•c g·ªëc ƒë·ªÉ l∆∞u t·∫•t c·∫£ c√°c file log
LOG_BASE_DIR = "logs"

# D√πng m·ªôt dictionary ƒë·ªÉ l∆∞u c√°c logger ƒë√£ t·∫°o, tr√°nh vi·ªác t·∫°o l·∫°i v√† g√¢y ra log tr√πng l·∫∑p
_loggers = {}


def setup_logger(model_name: str, log_level=logging.INFO):
    """
    Thi·∫øt l·∫≠p v√† tr·∫£ v·ªÅ m·ªôt logger ƒë·ªÉ ghi log v√†o c·∫£ console v√† file.

    - M·ªói model s·∫Ω c√≥ m·ªôt th∆∞ m·ª•c log ri√™ng d·ª±a tr√™n `model_name`.
    - M·ªói l·∫ßn ch·∫°y s·∫Ω t·∫°o m·ªôt file log m·ªõi c√≥ t√™n l√† timestamp (v√≠ d·ª•: 2023-10-27_15-30-00.log).
    - ƒê·∫£m b·∫£o kh√¥ng c√≥ log n√†o b·ªã ghi ƒë√®.

    Args:
        model_name (str): T√™n c·ªßa model, d√πng ƒë·ªÉ t·∫°o th∆∞ m·ª•c con. V√≠ d·ª•: 'xnli-large-tuned'.
        log_level (int): C·∫•p ƒë·ªô log, m·∫∑c ƒë·ªãnh l√† logging.INFO.

    Returns:
        logging.Logger: Instance c·ªßa logger ƒë√£ ƒë∆∞·ª£c c·∫•u h√¨nh.
    """
    # N·∫øu logger cho model n√†y ƒë√£ t·ªìn t·∫°i, tr·∫£ v·ªÅ n√≥ ngay l·∫≠p t·ª©c
    if model_name in _loggers:
        return _loggers[model_name]

    # X·ª≠ l√Ω t√™n model ƒë·ªÉ an to√†n khi t·∫°o t√™n th∆∞ m·ª•c (thay th·∫ø "/")
    safe_model_name = model_name.replace("/", "_").replace("\\", "_")
    model_log_dir = os.path.join(LOG_BASE_DIR, safe_model_name)
    os.makedirs(model_log_dir, exist_ok=True)

    # T·∫°o logger
    logger = logging.getLogger(safe_model_name)
    logger.setLevel(log_level)

    # NgƒÉn kh√¥ng cho log lan truy·ªÅn ƒë·∫øn root logger ƒë·ªÉ tr√°nh in ra console 2 l·∫ßn
    logger.propagate = False

    # ƒê·ªãnh d·∫°ng cho log message
    formatter = logging.Formatter(
        "%(asctime)s - [%(levelname)s] - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
    )

    # T·∫°o File Handler ƒë·ªÉ ghi log ra file
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    log_file_path = os.path.join(model_log_dir, f"{timestamp}.log")

    file_handler = logging.FileHandler(log_file_path, encoding="utf-8")
    file_handler.setLevel(log_level)
    file_handler.setFormatter(formatter)

    # T·∫°o Console (Stream) Handler ƒë·ªÉ in log ra m√†n h√¨nh
    console_handler = logging.StreamHandler()
    console_handler.setLevel(log_level)
    console_handler.setFormatter(formatter)

    # Th√™m c√°c handler v√†o logger
    logger.addHandler(file_handler)
    logger.addHandler(console_handler)

    # L∆∞u logger v√†o cache
    _loggers[model_name] = logger

    logger.info(
        f"Logger cho '{safe_model_name}' ƒë√£ ƒë∆∞·ª£c kh·ªüi t·∫°o. File log: {log_file_path}"
    )

    return logger


## Setup logger

In [3]:
logger = setup_logger(f"{cfg.MODEL_NAME}-training")
logger.info(f"Logger initialized for {cfg.MODEL_NAME}")

logger.info("=" * 60)
logger.info("üöÄ STARTING TRAINING SESSION")
logger.info("=" * 60)
for key, value in Config.__dict__.items():
    if not key.startswith("__") and not callable(value):
        logger.info(f"{key}: {value}")
logger.info("=" * 60)


2025-10-16 21:44:15 - [INFO] - Logger cho 'uitnlp_CafeBERT-training' ƒë√£ ƒë∆∞·ª£c kh·ªüi t·∫°o. File log: logs/uitnlp_CafeBERT-training/2025-10-16_21-44-15.log
2025-10-16 21:44:15 - [INFO] - Logger initialized for uitnlp/CafeBERT
2025-10-16 21:44:15 - [INFO] - üöÄ STARTING TRAINING SESSION
2025-10-16 21:44:15 - [INFO] - ROOT_DIR: /home/guest/Projects/CS221
2025-10-16 21:44:15 - [INFO] - DATA_DIR: /home/guest/Projects/CS221/data
2025-10-16 21:44:15 - [INFO] - TRAIN_FILE: /home/guest/Projects/CS221/data/vihallu-train.csv
2025-10-16 21:44:15 - [INFO] - TEST_FILE: /home/guest/Projects/CS221/data/vihallu-public-test.csv
2025-10-16 21:44:15 - [INFO] - SUBMISSION_DIR: /home/guest/Projects/CS221/submission
2025-10-16 21:44:15 - [INFO] - SUBMISSION_CSV: submit.csv
2025-10-16 21:44:15 - [INFO] - SUBMISSION_ZIP: submit.zip
2025-10-16 21:44:15 - [INFO] - MODEL_NAME: uitnlp/CafeBERT
2025-10-16 21:44:15 - [INFO] - MODEL_OUTPUT_DIR: /home/guest/Projects/CS221/models/CafeBERT-tuned
2025-10-16 21:44:

## Prepare data

In [None]:
from sklearn.model_selection import train_test_split


def prepare_data(config, logger=None):
    df = pd.read_csv(config.TRAIN_FILE)
    print(f"‚úÖ ƒê·ªçc th√†nh c√¥ng {len(df)} m·∫´u t·ª´ file ƒë√£ x·ª≠ l√Ω: {config.TRAIN_FILE}")

    # T·∫°o 2 c·ªôt premise v√† hypothesis t·ª´ ng·ªØ c·∫£nh (context)
    df["premise"] = (
        "C√¢u h·ªèi: "
        + df["prompt"].astype(str)
        + " Ng·ªØ c·∫£nh: "
        + df["context"].astype(str)
    )
    df["hypothesis"] = df["response"].astype(str)

    df["label_id"] = df["label"].map(config.LABEL_MAP)
    df.dropna(subset=["label_id"], inplace=True)
    df["label_id"] = df["label_id"].astype(int)

    train_df, val_df = train_test_split(
        df,
        test_size=config.VALIDATION_SPLIT_SIZE,
        random_state=config.RANDOM_STATE,
        stratify=df["label_id"],
    )

    if logger:
        logger.info(
            f"Chia d·ªØ li·ªáu: {len(train_df)} m·∫´u train, {len(val_df)} m·∫´u validation."
        )

    # --- PH·∫¶N N√ÇNG C·∫§P: L∆ØU FILE RA TH∆Ø M·ª§C DATA ---
    # T·∫°o th∆∞ m·ª•c 'processed' trong 'data' n·∫øu ch∆∞a c√≥
    processed_data_dir = os.path.join(config.DATA_DIR, "processed")
    os.makedirs(processed_data_dir, exist_ok=True)

    # ƒê·ªãnh nghƒ©a ƒë∆∞·ªùng d·∫´n file
    train_output_path = os.path.join(processed_data_dir, "train_split.csv")
    val_output_path = os.path.join(processed_data_dir, "validation_split.csv")

    # L∆∞u c√°c DataFrame
    train_df.to_csv(train_output_path, index=False, encoding="utf-8-sig")
    val_df.to_csv(val_output_path, index=False, encoding="utf-8-sig")

    print(f"‚úÖ ƒê√£ l∆∞u t·∫≠p train v√†o: {train_output_path}")
    print(f"‚úÖ ƒê√£ l∆∞u t·∫≠p validation v√†o: {val_output_path}")
    # --- K·∫æT TH√öC PH·∫¶N N√ÇNG C·∫§P ---

    return train_df, val_df


# Model (C·∫≠p nh·∫≠t cho Masked Language Modeling)

In [5]:
from transformers import (
    AutoConfig,
    AutoTokenizer,
    AutoModelForMaskedLM,
)  # <-- THAY ƒê·ªîI ·ªû ƒê√ÇY


def get_model_and_tokenizer(config):
    """T·∫£i pre-trained model v√† tokenizer cho Masked LM."""
    print(f"ƒêang t·∫£i model: {config.MODEL_NAME}")
    tokenizer = AutoTokenizer.from_pretrained(config.MODEL_NAME)

    cfg_model = AutoConfig.from_pretrained(config.MODEL_NAME, trust_remote_code=True)
    print(f"Model config: {cfg_model}")

    # --- THAY ƒê·ªîI QUAN TR·ªåNG ---
    # Chuy·ªÉn t·ª´ SequenceClassification sang MaskedLM
    model = AutoModelForMaskedLM.from_pretrained(
        config.MODEL_NAME,
        config=cfg_model,
    )
    # -------------------------

    return model, tokenizer


# Thi·∫øt l·∫≠p Pattern & Verbalizer cho PET

In [6]:
import torch


def setup_pet_components(tokenizer, logger):
    """
    ƒê·ªãnh nghƒ©a Pattern v√† Verbalizer cho ph∆∞∆°ng ph√°p PET.
    """
    # 1. ƒê·ªãnh nghƒ©a Pattern (M·∫´u c√¢u)
    # --- THAY ƒê·ªîI QUAN TR·ªåNG: ƒê∆∞a {mask} l√™n ƒë·∫ßu ---
    pattern = '{mask}! D·ª±a tr√™n th√¥ng tin: "{premise}", c√¢u tr·∫£ l·ªùi "{hypothesis}" c√≥ ƒë√∫ng kh√¥ng?'
    # B·∫°n c≈©ng c√≥ th·ªÉ th·ª≠ c√°c pattern kh√°c nh∆∞:
    # pattern = "Ph√°n quy·∫øt: {mask}. B·∫±ng ch·ª©ng: \"{premise}\". Gi·∫£ thuy·∫øt: \"{hypothesis}\"."
    # ------------------------------------------------

    logger.info(f"S·ª≠ d·ª•ng Pattern (ƒë√£ s·ª≠a l·ªói): {pattern}")

    # 2. ƒê·ªãnh nghƒ©a Verbalizer (Gi·ªØ nguy√™n)
    verbalizer = {
        2: "chu·∫©n",  # no Thay "ƒë√∫ng" b·∫±ng "chu·∫©n" ho·∫∑c "ch√≠nh x√°c"
        0: "tr·∫≠t",  # intrinsic
        1: "kh√°c",  # extrinsic
    }
    logger.info(f"S·ª≠ d·ª•ng Verbalizer: {verbalizer}")

    # 3. L·∫•y token ID cho c√°c t·ª´ trong verbalizer
    verbalizer_token_ids = {
        label_id: tokenizer.convert_tokens_to_ids(verb)
        for label_id, verb in verbalizer.items()
    }

    # Ki·ªÉm tra xem c√≥ t·ª´ n√†o b·ªã t√°ch th√†nh nhi·ªÅu sub-token kh√¥ng
    for label_id, verb in verbalizer.items():
        tokens = tokenizer.tokenize(verb)
        if len(tokens) > 1:
            logger.warning(f"‚ö†Ô∏è T·ª´ '{verb}' b·ªã t√°ch th√†nh nhi·ªÅu token: {tokens}.")
        else:
            logger.info(
                f"‚úÖ T·ª´ '{verb}' (ID: {verbalizer_token_ids[label_id]}) l√† token ƒë∆°n l·∫ª."
            )

    return pattern, verbalizer, verbalizer_token_ids


# Hallucination Dataset (C·∫≠p nh·∫≠t cho PET)

In [7]:
import torch
from torch.utils.data import Dataset


class HallucinationDataset(Dataset):
    def __init__(self, premises, hypotheses, labels, tokenizer, max_len, pattern):
        self.premises = premises
        self.hypotheses = hypotheses
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.pattern = pattern

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        premise = self.premises[idx]
        hypothesis = self.hypotheses[idx]
        label = self.labels[idx]

        # 1. T·∫°o c√¢u prompt ho√†n ch·ªânh b·∫±ng c√°ch ƒëi·ªÅn premise v√† hypothesis v√†o pattern
        prompt_text = self.pattern.format(
            premise=premise, hypothesis=hypothesis, mask=self.tokenizer.mask_token
        )

        # 2. Tokenize c√¢u prompt ƒë√£ t·∫°o
        encoding = self.tokenizer(
            prompt_text,
            add_special_tokens=True,
            max_length=self.max_len,
            padding=False,
            truncation=True,
            return_attention_mask=True,
            return_token_type_ids=False,
        )

        return {
            "input_ids": encoding["input_ids"],
            "attention_mask": encoding["attention_mask"],
            "labels": label,  # Tr·∫£ v·ªÅ s·ªë nguy√™n ƒë·ªÉ DataCollator x·ª≠ l√Ω
        }


# Training

In [8]:
import os
import math
import torch
import numpy as np
import pandas as pd
from dotenv import load_dotenv
from tqdm.auto import tqdm
from torch.optim import AdamW
from huggingface_hub import login
from transformers import get_scheduler
from torch.utils.data import DataLoader
from tqdm.contrib.logging import logging_redirect_tqdm
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import f1_score, accuracy_score, classification_report
from functools import partial


## Train & Evaluate Functions (C·∫≠p nh·∫≠t cho PET)

In [9]:
def train_one_epoch(
    model,
    data_loader,
    loss_fn,
    optimizer,
    scheduler,
    device,
    tokenizer,
    verbalizer_ids_tensor,
    label_map,
    epoch=None,
    total_epochs=None,
    gradient_accumulation_steps=1,
):
    model.train()
    total_loss = 0
    desc = f"Train" if epoch is None else f"Epoch {epoch}/{total_epochs}"
    progress_bar = tqdm(
        data_loader, desc=desc, leave=False, dynamic_ncols=True, mininterval=0.5
    )

    optimizer.zero_grad()
    steps_in_epoch = len(data_loader)
    with logging_redirect_tqdm():
        for step, batch in enumerate(progress_bar):
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            original_labels = batch["labels"]
            target_labels = torch.tensor(
                [label_map[l.item()] for l in original_labels], dtype=torch.long
            ).to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits

            mask_token_indices = (input_ids == tokenizer.mask_token_id).nonzero(
                as_tuple=True
            )[1]
            mask_logits = logits[torch.arange(logits.size(0)), mask_token_indices]
            verbalizer_logits = mask_logits[:, verbalizer_ids_tensor]

            loss = loss_fn(verbalizer_logits, target_labels)
            total_loss += loss.item()
            scaled_loss = loss / gradient_accumulation_steps
            scaled_loss.backward()

            if (step + 1) % gradient_accumulation_steps == 0 or (
                step + 1
            ) == steps_in_epoch:
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()
                scheduler.step()
                optimizer.zero_grad()

            progress_bar.set_postfix({"loss": f"{loss.item():.4f}"})

    return total_loss / len(data_loader)


## Evaluate

In [10]:
def evaluate(
    model,
    data_loader,
    loss_fn,
    device,
    tokenizer,
    verbalizer_ids_tensor,
    label_map,
    id2label_map,
):
    model.eval()
    all_preds = []
    all_labels = []
    total_val_loss = 0
    progress_bar = tqdm(data_loader, desc="Evaluating", leave=False, dynamic_ncols=True)

    with torch.no_grad(), logging_redirect_tqdm():
        for batch in progress_bar:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            original_labels = batch["labels"]
            target_labels = torch.tensor(
                [label_map[l.item()] for l in original_labels], dtype=torch.long
            ).to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits

            mask_token_indices = (input_ids == tokenizer.mask_token_id).nonzero(
                as_tuple=True
            )[1]
            mask_logits = logits[torch.arange(logits.size(0)), mask_token_indices]
            verbalizer_logits = mask_logits[:, verbalizer_ids_tensor]

            loss = loss_fn(verbalizer_logits, target_labels)
            total_val_loss += loss.item()

            preds_indices = torch.argmax(verbalizer_logits, dim=-1)
            original_preds = [id2label_map[p.item()] for p in preds_indices]

            all_preds.extend(original_preds)
            all_labels.extend(original_labels.cpu().numpy())

    avg_val_loss = total_val_loss / len(data_loader)
    return all_labels, all_preds, avg_val_loss


# Main

In [11]:
# T·∫£i bi·∫øn m√¥i tr∆∞·ªùng t·ª´ file envs/.env.
dotenv_path = os.path.join(os.getcwd(), "envs", ".env")
load_dotenv(dotenv_path)
print(f"dotenv_path: {dotenv_path}")


dotenv_path: /home/guest/Projects/CS221/envs/.env


In [12]:
# l·∫•y HF token ƒë·ªÉ login
hf_token = os.getenv("HUGGING_FACE_TOKEN")

if hf_token:
    print("INFO: T√¨m th·∫•y HUGGING_FACE_TOKEN. ƒêang ƒëƒÉng nh·∫≠p...")
    login(token=hf_token)
    print("INFO: ƒêƒÉng nh·∫≠p Hugging Face th√†nh c√¥ng.")
else:
    print(
        "WARNING: Kh√¥ng t√¨m th·∫•y HUGGING_FACE_TOKEN trong file .env. M·ªôt s·ªë model c√≥ th·ªÉ y√™u c·∫ßu ƒëƒÉng nh·∫≠p."
    )


INFO: T√¨m th·∫•y HUGGING_FACE_TOKEN. ƒêang ƒëƒÉng nh·∫≠p...
INFO: ƒêƒÉng nh·∫≠p Hugging Face th√†nh c√¥ng.


# ==============================================================================
# 1. Chu·∫©n b·ªã d·ªØ li·ªáu
# ==============================================================================

In [13]:
logger.info("B·∫Øt ƒë·∫ßu pipeline hu·∫•n luy·ªán PET.")
logger.info("B∆∞·ªõc 1: Chu·∫©n b·ªã d·ªØ li·ªáu...")
train_df, val_df = prepare_data(cfg, logger=logger)


2025-10-16 21:44:18 - [INFO] - B·∫Øt ƒë·∫ßu pipeline hu·∫•n luy·ªán PET.
2025-10-16 21:44:18 - [INFO] - B∆∞·ªõc 1: Chu·∫©n b·ªã d·ªØ li·ªáu...
2025-10-16 21:44:18 - [INFO] - Chia d·ªØ li·ªáu: 5600 m·∫´u train, 1400 m·∫´u validation.


‚úÖ ƒê·ªçc th√†nh c√¥ng 7000 m·∫´u t·ª´ file ƒë√£ x·ª≠ l√Ω: /home/guest/Projects/CS221/data/vihallu-train.csv
‚úÖ ƒê√£ l∆∞u t·∫≠p train v√†o: /home/guest/Projects/CS221/data/processed/train_split.csv
‚úÖ ƒê√£ l∆∞u t·∫≠p validation v√†o: /home/guest/Projects/CS221/data/processed/validation_split.csv


# ==============================================================================
# 2. T·∫£i model, tokenizer v√† thi·∫øt l·∫≠p PET
# ==============================================================================

In [14]:
logger.info(f"B∆∞·ªõc 2: T·∫£i model '{cfg.MODEL_NAME}' v√† tokenizer...")
model, tokenizer = get_model_and_tokenizer(cfg)

logger.info(f"Thi·∫øt l·∫≠p c√°c th√†nh ph·∫ßn cho PET...")
pattern, verbalizer, verbalizer_token_ids = setup_pet_components(tokenizer, logger)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

verbalizer_ids_tensor = torch.tensor(
    [verbalizer_token_ids[i] for i in sorted(verbalizer_token_ids.keys())]
).to(device)

original_label_to_verbalizer_idx = {
    label: i for i, label in enumerate(sorted(verbalizer_token_ids.keys()))
}
verbalizer_idx_to_original_label = {
    i: label for i, label in enumerate(sorted(verbalizer_token_ids.keys()))
}


2025-10-16 21:44:18 - [INFO] - B∆∞·ªõc 2: T·∫£i model 'uitnlp/CafeBERT' v√† tokenizer...


ƒêang t·∫£i model: uitnlp/CafeBERT


2025-10-16 21:44:20 - [INFO] - Thi·∫øt l·∫≠p c√°c th√†nh ph·∫ßn cho PET...
2025-10-16 21:44:20 - [INFO] - S·ª≠ d·ª•ng Pattern (ƒë√£ s·ª≠a l·ªói): {mask}! D·ª±a tr√™n th√¥ng tin: "{premise}", c√¢u tr·∫£ l·ªùi "{hypothesis}" c√≥ ƒë√∫ng kh√¥ng?
2025-10-16 21:44:20 - [INFO] - S·ª≠ d·ª•ng Verbalizer: {2: 'chu·∫©n', 0: 'tr·∫≠t', 1: 'kh√°c'}
2025-10-16 21:44:20 - [INFO] - ‚úÖ T·ª´ 'chu·∫©n' (ID: 3) l√† token ƒë∆°n l·∫ª.
2025-10-16 21:44:20 - [INFO] - ‚úÖ T·ª´ 'tr·∫≠t' (ID: 3) l√† token ƒë∆°n l·∫ª.
2025-10-16 21:44:20 - [INFO] - ‚úÖ T·ª´ 'kh√°c' (ID: 3) l√† token ƒë∆°n l·∫ª.


Model config: XLMRobertaConfig {
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "dtype": "float32",
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "output_past": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.57.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 250002
}



# ==============================================================================
# 3. T·∫°o Dataset v√† DataLoader
# ==============================================================================

In [15]:
from torch.utils.data import DataLoader
from transformers import DataCollatorWithPadding

logger.info("B∆∞·ªõc 3: T·∫°o Dataset v√† DataLoader...")
train_dataset = HallucinationDataset(
    premises=train_df["premise"].to_list(),
    hypotheses=train_df["hypothesis"].to_list(),
    labels=train_df["label_id"].to_list(),
    tokenizer=tokenizer,
    max_len=cfg.MAX_LENGTH,
    pattern=pattern,  # <-- Truy·ªÅn pattern v√†o
)
val_dataset = HallucinationDataset(
    premises=val_df["premise"].to_list(),
    hypotheses=val_df["hypothesis"].to_list(),
    labels=val_df["label_id"].to_list(),
    tokenizer=tokenizer,
    max_len=cfg.MAX_LENGTH,
    pattern=pattern,  # <-- Truy·ªÅn pattern v√†o
)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

train_loader = DataLoader(
    train_dataset, batch_size=cfg.BATCH_SIZE, shuffle=True, collate_fn=data_collator
)
val_loader = DataLoader(
    val_dataset, batch_size=cfg.BATCH_SIZE, collate_fn=data_collator
)
logger.info("‚úÖ T·∫°o DataLoader th√†nh c√¥ng v·ªõi DataCollatorWithPadding chu·∫©n!")


2025-10-16 21:44:21 - [INFO] - B∆∞·ªõc 3: T·∫°o Dataset v√† DataLoader...
2025-10-16 21:44:21 - [INFO] - ‚úÖ T·∫°o DataLoader th√†nh c√¥ng v·ªõi DataCollatorWithPadding chu·∫©n!
2025-10-16 21:44:21 - [INFO] - ‚úÖ T·∫°o DataLoader th√†nh c√¥ng v·ªõi DataCollatorWithPadding chu·∫©n!


### Check dataset

In [16]:
print("\n--- Ki·ªÉm tra 1 batch d·ªØ li·ªáu ƒë·∫ßu v√†o ---")
sample_batch = next(iter(train_loader))

print("K√≠ch th∆∞·ªõc input_ids:", sample_batch["input_ids"].shape)
print("K√≠ch th∆∞·ªõc attention_mask:", sample_batch["attention_mask"].shape)
print("Nh√£n trong batch:", sample_batch["labels"])

# Gi·∫£i m√£ m·ªôt m·∫´u ƒë·ªÉ xem n√≥ tr√¥ng nh∆∞ th·∫ø n√†o
decoded_text = tokenizer.decode(sample_batch["input_ids"][0], skip_special_tokens=False)
print("\nM·ªôt m·∫´u ƒë√£ ƒë∆∞·ª£c token h√≥a v√† gi·∫£i m√£ l·∫°i:")
print(decoded_text)
print("------------------------------------------\n")
# --- K·∫æT TH√öC B∆Ø·ªöC KI·ªÇM TRA ---



--- Ki·ªÉm tra 1 batch d·ªØ li·ªáu ƒë·∫ßu v√†o ---
K√≠ch th∆∞·ªõc input_ids: torch.Size([1, 269])
K√≠ch th∆∞·ªõc attention_mask: torch.Size([1, 269])
Nh√£n trong batch: tensor([2])

M·ªôt m·∫´u ƒë√£ ƒë∆∞·ª£c token h√≥a v√† gi·∫£i m√£ l·∫°i:
<s><mask> ! D·ª±a tr√™n th√¥ng tin: "C√¢u h·ªèi: NƒÉm 2012, t·ªâ tr·ªçng ƒë·∫ßu t∆∞ c·ªßa M·ªπ v√†o R&D l√† 33%, ƒë√∫ng kh√¥ng, v√† ƒëi·ªÅu n√†y cho th·∫•y s·ª± gia tƒÉng v∆∞·ª£t b·∫≠c so v·ªõi c√°c nƒÉm tr∆∞·ªõc ƒë√≥? Ng·ªØ c·∫£nh: D√π gi√° tr·ªã ƒë·∫ßu t∆∞ v√†o R&D t·∫°i M·ªπ cao, n√≥ v·∫´n ch∆∞a ƒë√°p ·ª©ng ƒë∆∞·ª£c m·ª•c ti√™u m√† t·ªïng th·ªëng Obama ƒë·ªÅ ra l√† 3% GDP v√†o th·ªùi ƒëi·ªÉm cu·ªëi nhi·ªám k·ª≥ nƒÉm 2016. S·ª± ƒë·ªôc t√¥n c·ªßa Hoa K·ª≥ trong lƒ©nh v·ª±c n√†y ƒëang b·ªã suy gi·∫£m, th·∫≠m ch√≠ v·ªõi c√°c qu·ªëc gia kh√°c, nh∆∞ Trung Qu·ªëc, ƒëang ƒë·∫©y c√°c ho·∫°t ƒë·ªông t√†i tr·ª£ R&D c·ªßa h·ªçc l√™n m·ª©c ƒë·ªô m·ªõi. T·ª´ nƒÉm 2009 ƒë·∫øn 2012, t·ª∑ tr·ªçng t·ªïng ƒë·∫ßu t∆∞ c·ªßa Hoa K·ª≥ v√†o R&D so v·ªõi th·∫ø gi·ªõi gi

In [17]:
print("\n--- Ki·ªÉm tra chi ti·∫øt 5 m·∫´u ƒë·∫ßu ti√™n ƒë·ªÉ so s√°nh tr∆∞·ªõc v√† sau khi x·ª≠ l√Ω ---")

# L·∫•y 5 m·∫´u ƒë·∫ßu ti√™n t·ª´ DataFrame g·ªëc ƒë·ªÉ so s√°nh
num_samples_to_check = 5
for i in range(num_samples_to_check):
    print(f"\n=============== M·∫™U {i} ===============")

    # 1. L·∫•y d·ªØ li·ªáu g·ªëc t·ª´ DataFrame
    original_premise = train_df["premise"].iloc[i]
    original_hypothesis = train_df["hypothesis"].iloc[i]
    # N·ªëi 2 chu·ªói l·∫°i gi·ªëng c√°ch tokenizer s·∫Ω th·∫•y ch√∫ng
    original_combined_text = original_premise + " [SEP] " + original_hypothesis

    # 2. L·∫•y d·ªØ li·ªáu ƒë√£ ƒë∆∞·ª£c x·ª≠ l√Ω t·ª´ Dataset
    processed_sample = train_dataset[i]
    processed_input_ids = processed_sample["input_ids"]

    # 3. Gi·∫£i m√£ (decode) c√°c input_ids ƒë√£ x·ª≠ l√Ω tr·ªü l·∫°i th√†nh vƒÉn b·∫£n
    decoded_text = tokenizer.decode(processed_input_ids, skip_special_tokens=False)

    # 4. So s√°nh v√† in k·∫øt qu·∫£
    original_token_count = len(tokenizer.encode(original_premise, original_hypothesis))
    processed_token_count = len(processed_input_ids)

    print(f"S·ªë token g·ªëc (∆∞·ªõc t√≠nh): {original_token_count}")
    print(
        f"S·ªë token sau khi x·ª≠ l√Ω (gi·ªõi h·∫°n b·ªüi max_len={cfg.MAX_LENGTH}): {processed_token_count}"
    )

    if original_token_count > cfg.MAX_LENGTH:
        print("‚ö†Ô∏è  C·∫¢NH B√ÅO: M·∫´u n√†y ƒë√£ b·ªã c·∫Øt b·ªõt (truncated)!")
    else:
        print("‚úÖ  OK: ƒê·ªô d√†i m·∫´u n·∫±m trong gi·ªõi h·∫°n, kh√¥ng b·ªã c·∫Øt.")

    print("\n--- VƒÉn b·∫£n G·ªêC  ---")
    print(original_combined_text)

    print("\n--- VƒÉn b·∫£n SAU KHI DECODE t·ª´ input_ids ---")
    print(decoded_text)

print("\n===========================================")
print(
    "Ki·ªÉm tra ho√†n t·∫•t. H√£y so s√°nh vƒÉn b·∫£n tr√™n ƒë·ªÉ xem c√≥ s·ª± kh√°c bi·ªát ·ªü cu·ªëi chu·ªói kh√¥ng."
)



--- Ki·ªÉm tra chi ti·∫øt 5 m·∫´u ƒë·∫ßu ti√™n ƒë·ªÉ so s√°nh tr∆∞·ªõc v√† sau khi x·ª≠ l√Ω ---

S·ªë token g·ªëc (∆∞·ªõc t√≠nh): 216
S·ªë token sau khi x·ª≠ l√Ω (gi·ªõi h·∫°n b·ªüi max_len=512): 235
‚úÖ  OK: ƒê·ªô d√†i m·∫´u n·∫±m trong gi·ªõi h·∫°n, kh√¥ng b·ªã c·∫Øt.

--- VƒÉn b·∫£n G·ªêC  ---
C√¢u h·ªèi: N∆∞·ªõc ch·∫•m ƒë∆∞·ª£c pha tr·ªôn b·ªüi nh·ªØng nguy√™n li·ªáu g√¨? Ng·ªØ c·∫£nh: ·∫®m th·ª±c Vi·ªát Nam ƒë·∫∑c tr∆∞ng v·ªõi vi·ªác s·ª≠ d·ª•ng r·∫•t nhi·ªÅu lo·∫°i m·∫Øm, n∆∞·ªõc ch·∫•m t·ª´ lo√£ng ƒë·∫øn ƒë·∫∑c. M·∫Øm, n∆∞·ªõc ch·∫•m c√≥ th·ªÉ d√πng nguy√™n ch·∫•t, c√≥ th·ªÉ ch∆∞ng l√™n ho·∫∑c pha ch·∫ø, ph·ªëi tr·ªôn v·ªõi ·ªõt, g·ª´ng ho·∫∑c t·ªèi, h·∫°t ti√™u, ƒë∆∞·ªùng, chanh ho·∫∑c gi·∫•m. Ng∆∞·ªùi s√†nh n·ªôi tr·ª£ th∆∞·ªùng c√≥ kinh nghi·ªám ƒë·∫∑c bi·ªát ƒë·ªÉ pha ch·∫ø n∆∞·ªõc ch·∫•m t√πy theo m√≥n ƒÉn. Th·∫≠m ch√≠, c√πng nguy√™n li·ªáu l√† n∆∞·ªõc m·∫Øm, d·∫•m, ƒë∆∞·ªùng, t·ªèi, ·ªõt, d√πng ƒë·ªÉ ƒÉn v·ªõi m√≥n g√¨ th√¨ t·ª∑ l·ªá c√°c th√†nh ph·∫ßn pha ch·∫ø c≈©ng kh√

# ==============================================================================
# 4. Thi·∫øt l·∫≠p Hu·∫•n luy·ªán
# ==============================================================================

In [18]:
logger.info("B∆∞·ªõc 4: Thi·∫øt l·∫≠p optimizer, scheduler v√† loss function...")
optimizer = AdamW(
    model.parameters(),
    lr=cfg.LEARNING_RATE,
    weight_decay=cfg.WEIGHT_DECAY,
    eps=cfg.EPSILON,
)

gradient_accumulation_steps = max(1, cfg.GRADIENT_ACCUMULATION_STEPS)
num_update_steps_per_epoch = math.ceil(len(train_loader) / gradient_accumulation_steps)
num_training_steps = num_update_steps_per_epoch * cfg.EPOCHS
logger.info(
    f"Scheduler s·∫Ω ch·∫°y trong {num_training_steps} b∆∞·ªõc ({num_update_steps_per_epoch} b∆∞·ªõc/epoch)"
)

warmup_steps = max(1, int(cfg.TOTAL_STEP_SCALE * num_training_steps))
scheduler = get_scheduler(
    cfg.SCHEDULER_TYPE,
    optimizer=optimizer,
    num_warmup_steps=warmup_steps,
    num_training_steps=num_training_steps,
)
logger.info(f"Warmup steps: {warmup_steps}")

class_weights_tensor = torch.tensor(cfg.CLASS_WEIGHTS, dtype=torch.float).to(device)
loss_fn = torch.nn.CrossEntropyLoss(
    weight=class_weights_tensor, label_smoothing=cfg.LABEL_SMOOTHING
).to(device)
logger.info("S·ª≠ d·ª•ng Class Weights & Label smoothing cho h√†m loss.")


2025-10-16 21:44:21 - [INFO] - B∆∞·ªõc 4: Thi·∫øt l·∫≠p optimizer, scheduler v√† loss function...
2025-10-16 21:44:21 - [INFO] - Scheduler s·∫Ω ch·∫°y trong 3500 b∆∞·ªõc (350 b∆∞·ªõc/epoch)
2025-10-16 21:44:21 - [INFO] - Warmup steps: 350
2025-10-16 21:44:21 - [INFO] - S·ª≠ d·ª•ng Class Weights & Label smoothing cho h√†m loss.
2025-10-16 21:44:21 - [INFO] - Scheduler s·∫Ω ch·∫°y trong 3500 b∆∞·ªõc (350 b∆∞·ªõc/epoch)
2025-10-16 21:44:21 - [INFO] - Warmup steps: 350
2025-10-16 21:44:21 - [INFO] - S·ª≠ d·ª•ng Class Weights & Label smoothing cho h√†m loss.


# ==============================================================================
# 5. V√≤ng l·∫∑p Hu·∫•n luy·ªán
# ==============================================================================

In [19]:
logger.info("B∆∞·ªõc 5: B·∫Øt ƒë·∫ßu v√≤ng l·∫∑p hu·∫•n luy·ªán...")
best_macro_f1 = 0.0
patience_counter = 0

for epoch in range(cfg.EPOCHS):
    logger.info(f"--- Epoch {epoch + 1}/{cfg.EPOCHS} ---")

    avg_train_loss = train_one_epoch(
        model,
        train_loader,
        loss_fn,
        optimizer,
        scheduler,
        device,
        tokenizer=tokenizer,
        verbalizer_ids_tensor=verbalizer_ids_tensor,
        label_map=original_label_to_verbalizer_idx,
        epoch=epoch + 1,
        total_epochs=cfg.EPOCHS,
        gradient_accumulation_steps=gradient_accumulation_steps,
    )
    logger.info(f"Loss trung b√¨nh tr√™n t·∫≠p train: {avg_train_loss:.4f}")
    logger.info(f"Current Learning Rate: {optimizer.param_groups[0]['lr']:.2e}")

    logger.info("B·∫Øt ƒë·∫ßu ƒë√°nh gi√° tr√™n t·∫≠p validation...")
    val_labels, val_preds, avg_val_loss = evaluate(
        model,
        val_loader,
        loss_fn,
        device,
        tokenizer=tokenizer,
        verbalizer_ids_tensor=verbalizer_ids_tensor,
        label_map=original_label_to_verbalizer_idx,
        id2label_map=verbalizer_idx_to_original_label,
    )

    accuracy = accuracy_score(val_labels, val_preds)
    macro_f1 = f1_score(val_labels, val_preds, average="macro")

    logger.info(f"Validation Loss: {avg_val_loss:.4f}")
    logger.info(f"Validation Accuracy: {accuracy:.4f}")
    logger.info(f"Validation Macro-F1: {macro_f1:.4f}")

    report = classification_report(
        val_labels,
        val_preds,
        target_names=[cfg.ID2LABEL[i] for i in range(len(cfg.LABEL_MAP))],
        digits=4,
    )
    logger.info(f"Classification Report tr√™n t·∫≠p validation:\n{report}")

    if macro_f1 > best_macro_f1:
        best_macro_f1 = macro_f1
        patience_counter = 0
        logger.info(
            f"üéâ Macro-F1 c·∫£i thi·ªán. ƒêang l∆∞u model t·ªët nh·∫•t v√†o '{cfg.MODEL_OUTPUT_DIR}'..."
        )
        os.makedirs(cfg.MODEL_OUTPUT_DIR, exist_ok=True)
        model.save_pretrained(cfg.MODEL_OUTPUT_DIR)
        tokenizer.save_pretrained(cfg.MODEL_OUTPUT_DIR)
        logger.info("L∆∞u model th√†nh c√¥ng.")
    else:
        patience_counter += 1
        logger.warning(
            f"Macro-F1 kh√¥ng c·∫£i thi·ªán. Patience: {patience_counter}/{cfg.PATIENCE_LIMIT}"
        )
        if patience_counter >= cfg.PATIENCE_LIMIT:
            logger.info("Early stopping! D·ª´ng hu·∫•n luy·ªán.")
            break

logger.info("üèÅ Qu√° tr√¨nh hu·∫•n luy·ªán ho√†n t·∫•t.")
logger.info(
    f"Model t·ªët nh·∫•t v·ªõi Macro-F1 = {best_macro_f1:.4f} ƒë√£ ƒë∆∞·ª£c l∆∞u t·∫°i '{cfg.MODEL_OUTPUT_DIR}'"
)


2025-10-16 21:44:21 - [INFO] - B∆∞·ªõc 5: B·∫Øt ƒë·∫ßu v√≤ng l·∫∑p hu·∫•n luy·ªán...
2025-10-16 21:44:21 - [INFO] - --- Epoch 1/10 ---


Epoch 1/10:   0%|          | 0/5600 [00:00<?, ?it/s]

2025-10-16 21:52:11 - [INFO] - Loss trung b√¨nh tr√™n t·∫≠p train: 1.0986
2025-10-16 21:52:11 - [INFO] - Current Learning Rate: 8.00e-06
2025-10-16 21:52:11 - [INFO] - B·∫Øt ƒë·∫ßu ƒë√°nh gi√° tr√™n t·∫≠p validation...


Evaluating:   0%|          | 0/1400 [00:00<?, ?it/s]

2025-10-16 21:52:49 - [INFO] - Validation Loss: 1.0986
2025-10-16 21:52:49 - [INFO] - Validation Accuracy: 0.3500
2025-10-16 21:52:49 - [INFO] - Validation Macro-F1: 0.1728
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
2025-10-16 21:52:49 - [INFO] - Classification Report tr√™n t·∫≠p validation:
              precision    recall  f1-score   support

   intrinsic     0.3500    1.0000    0.5185       490
   extrinsic     0.0000    0.0000    0.0000       461
          no     0.0000    0.0000    0.0000       449

    accuracy                         0.3500      1400
   macro avg     0.1167    0.3333    0.1728      1400
weighted avg     0.1225    0.3500    0.1815      1400

2025-10-16 21:52:49 - [INFO] - üéâ Macro-F1 c·∫£i thi·ªán. ƒêang l∆∞u model t·ªët nh·∫•t v√†o '/home/guest/Projects/CS221/models/CafeBERT

Epoch 2/10:   0%|          | 0/5600 [00:00<?, ?it/s]

2025-10-16 22:00:49 - [INFO] - Loss trung b√¨nh tr√™n t·∫≠p train: 1.0986
2025-10-16 22:00:49 - [INFO] - Current Learning Rate: 7.76e-06
2025-10-16 22:00:49 - [INFO] - B·∫Øt ƒë·∫ßu ƒë√°nh gi√° tr√™n t·∫≠p validation...


Evaluating:   0%|          | 0/1400 [00:00<?, ?it/s]

2025-10-16 22:01:26 - [INFO] - Validation Loss: 1.0986
2025-10-16 22:01:26 - [INFO] - Validation Accuracy: 0.3500
2025-10-16 22:01:26 - [INFO] - Validation Macro-F1: 0.1728
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
2025-10-16 22:01:26 - [INFO] - Classification Report tr√™n t·∫≠p validation:
              precision    recall  f1-score   support

   intrinsic     0.3500    1.0000    0.5185       490
   extrinsic     0.0000    0.0000    0.0000       461
          no     0.0000    0.0000    0.0000       449

    accuracy                         0.3500      1400
   macro avg     0.1167    0.3333    0.1728      1400
weighted avg     0.1225    0.3500    0.1815      1400

2025-10-16 22:01:26 - [INFO] - --- Epoch 3/10 ---


Epoch 3/10:   0%|          | 0/5600 [00:00<?, ?it/s]

2025-10-16 22:09:33 - [INFO] - Loss trung b√¨nh tr√™n t·∫≠p train: 1.0986
2025-10-16 22:09:33 - [INFO] - Current Learning Rate: 7.06e-06
2025-10-16 22:09:33 - [INFO] - B·∫Øt ƒë·∫ßu ƒë√°nh gi√° tr√™n t·∫≠p validation...


Evaluating:   0%|          | 0/1400 [00:00<?, ?it/s]

2025-10-16 22:10:11 - [INFO] - Validation Loss: 1.0986
2025-10-16 22:10:11 - [INFO] - Validation Accuracy: 0.3500
2025-10-16 22:10:11 - [INFO] - Validation Macro-F1: 0.1728
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
2025-10-16 22:10:11 - [INFO] - Classification Report tr√™n t·∫≠p validation:
              precision    recall  f1-score   support

   intrinsic     0.3500    1.0000    0.5185       490
   extrinsic     0.0000    0.0000    0.0000       461
          no     0.0000    0.0000    0.0000       449

    accuracy                         0.3500      1400
   macro avg     0.1167    0.3333    0.1728      1400
weighted avg     0.1225    0.3500    0.1815      1400

2025-10-16 22:10:11 - [INFO] - Early stopping! D·ª´ng hu·∫•n luy·ªán.
2025-10-16 22:10:11 - [INFO] - üèÅ Qu√° tr√¨nh hu·∫•n luy·ªán ho√†n

# Ph√¢n ph·ªëi k·∫øt qu·∫£ ƒë√∫ng/sai theo t·ª´ng l·ªõp

In [20]:
val_label_names = [cfg.ID2LABEL[label_id] for label_id in val_labels]
pred_label_names = [cfg.ID2LABEL[pred_id] for pred_id in val_preds]
evaluation_df = pd.DataFrame(
    {
        "true_label": val_label_names,
        "predicted_label": pred_label_names,
    }
)
evaluation_df["status"] = evaluation_df.apply(
    lambda row: (
        "correct" if row["true_label"] == row["predicted_label"] else "incorrect"
    ),
    axis=1,
)
distribution_table = (
    evaluation_df.groupby(["true_label", "status"])
    .size()
    .unstack(fill_value=0)
    .rename_axis(None, axis=1)
    .reset_index()
    .sort_values("true_label")
)

# 1. Th√™m c·ªôt 'total' b·∫±ng c√°ch c·ªông c·ªôt 'correct' v√† 'incorrect'
distribution_table["total"] = (
    distribution_table["correct"] + distribution_table["incorrect"]
)

# 2. Th√™m c·ªôt t·ªâ l·ªá ƒë√∫ng (correct_rate)
distribution_table["correct_rate"] = (
    distribution_table["correct"] / distribution_table["total"]
)

# 3. Th√™m c·ªôt t·ªâ l·ªá sai (incorrect_rate)
distribution_table["incorrect_rate"] = (
    distribution_table["incorrect"] / distribution_table["total"]
)

# (T√πy ch·ªçn) Format c√°c c·ªôt t·ªâ l·ªá th√†nh d·∫°ng ph·∫ßn trƒÉm cho d·ªÖ ƒë·ªçc
distribution_table["correct_rate"] = distribution_table["correct_rate"].map(
    "{:.2%}".format
)
distribution_table["incorrect_rate"] = distribution_table["incorrect_rate"].map(
    "{:.2%}".format
)

# In ra b·∫£ng k·∫øt qu·∫£
logger.info(f"Ph√¢n ph·ªëi k·∫øt qu·∫£ tr√™n t·ª´ng l·ªõp:\n{distribution_table.to_string()}")

# Trong notebook, d√πng display() s·∫Ω cho b·∫£ng ƒë·∫πp h∆°n
print("B·∫£ng ph√¢n ph·ªëi k·∫øt qu·∫£ tr√™n t·ª´ng l·ªõp:")
display(distribution_table)


2025-10-16 22:10:11 - [INFO] - Ph√¢n ph·ªëi k·∫øt qu·∫£ tr√™n t·ª´ng l·ªõp:
  true_label  correct  incorrect  total correct_rate incorrect_rate
0  extrinsic        0        461    461        0.00%        100.00%
1  intrinsic      490          0    490      100.00%          0.00%
2         no        0        449    449        0.00%        100.00%


B·∫£ng ph√¢n ph·ªëi k·∫øt qu·∫£ tr√™n t·ª´ng l·ªõp:


Unnamed: 0,true_label,correct,incorrect,total,correct_rate,incorrect_rate
0,extrinsic,0,461,461,0.00%,100.00%
1,intrinsic,490,0,490,100.00%,0.00%
2,no,0,449,449,0.00%,100.00%
