# CONFIGURATION

In [None]:
# Cell [1] - ƒê√É C·∫¨P NH·∫¨T
import os

MODEL_NAMES = [
    "joeddav/xlm-roberta-large-xnli",
    "microsoft/infoxlm-large",
    "uitnlp/CafeBERT",
    "FacebookAI/xlm-roberta-large",
    "MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli",
    "MoritzLaurer/ernie-m-large-mnli-xnli",
    "microsoft/deberta-xlarge-mnli",
]


class Config:
    ROOT_DIR = os.getcwd()
    DATA_DIR = os.path.join(ROOT_DIR, "data")

    # --- THAY ƒê·ªîI QUAN TR·ªåNG ---
    # Tr·ªè ƒë·∫øn file ƒë√£ ƒë∆∞·ª£c x·ª≠ l√Ω b·∫±ng semantic search
    TRAIN_FILE = os.path.join(DATA_DIR, "vihallu-train.csv")

    TEST_FILE = os.path.join(DATA_DIR, "vihallu-public-test.csv")
    SUBMISSION_DIR = os.path.join(ROOT_DIR, "submission")
    SUBMISSION_CSV = "submit.csv"
    SUBMISSION_ZIP = "submit.zip"

    MODEL_NAME = MODEL_NAMES[3]
    MODEL_OUTPUT_DIR = os.path.join(
        ROOT_DIR, "models", f"{MODEL_NAME.split('/')[-1]}-tuned"
    )

    MAX_LENGTH = 512
    RANDOM_STATE = 42
    EPOCHS = 10
    BATCH_SIZE = 4
    GRADIENT_ACCUMULATION_STEPS = 4
    SCHEDULER_TYPE = "constant_with_warmup"
    LEARNING_RATE = 6e-6
    WEIGHT_DECAY = 0.01
    NUM_CYCLES = 3
    CLASSIFIER_DROPOUT = 0.1
    LABEL_SMOOTHING = 0.05
    TOTAL_STEP_SCALE = 0.1
    EPSILON = 1e-8

    PATIENCE_LIMIT = 3
    VALIDATION_SPLIT_SIZE = 0.2

    LABEL_MAP = {"no": 0, "extrinsic": 1, "intrinsic": 2}
    ID2LABEL = {v: k for k, v in LABEL_MAP.items()}
    CLASS_WEIGHTS = [1.0393466963622866, 1.0114145354717525, 0.9531590413943355]


cfg = Config()


# LOGGER

In [None]:
import logging
import os
from datetime import datetime

# Th∆∞ m·ª•c g·ªëc ƒë·ªÉ l∆∞u t·∫•t c·∫£ c√°c file log
LOG_BASE_DIR = "logs"

# D√πng m·ªôt dictionary ƒë·ªÉ l∆∞u c√°c logger ƒë√£ t·∫°o, tr√°nh vi·ªác t·∫°o l·∫°i v√† g√¢y ra log tr√πng l·∫∑p
_loggers = {}


def setup_logger(model_name: str, log_level=logging.INFO):
    """
    Thi·∫øt l·∫≠p v√† tr·∫£ v·ªÅ m·ªôt logger ƒë·ªÉ ghi log v√†o c·∫£ console v√† file.

    - M·ªói model s·∫Ω c√≥ m·ªôt th∆∞ m·ª•c log ri√™ng d·ª±a tr√™n `model_name`.
    - M·ªói l·∫ßn ch·∫°y s·∫Ω t·∫°o m·ªôt file log m·ªõi c√≥ t√™n l√† timestamp (v√≠ d·ª•: 2023-10-27_15-30-00.log).
    - ƒê·∫£m b·∫£o kh√¥ng c√≥ log n√†o b·ªã ghi ƒë√®.

    Args:
        model_name (str): T√™n c·ªßa model, d√πng ƒë·ªÉ t·∫°o th∆∞ m·ª•c con. V√≠ d·ª•: 'xnli-large-tuned'.
        log_level (int): C·∫•p ƒë·ªô log, m·∫∑c ƒë·ªãnh l√† logging.INFO.

    Returns:
        logging.Logger: Instance c·ªßa logger ƒë√£ ƒë∆∞·ª£c c·∫•u h√¨nh.
    """
    # N·∫øu logger cho model n√†y ƒë√£ t·ªìn t·∫°i, tr·∫£ v·ªÅ n√≥ ngay l·∫≠p t·ª©c
    if model_name in _loggers:
        return _loggers[model_name]

    # X·ª≠ l√Ω t√™n model ƒë·ªÉ an to√†n khi t·∫°o t√™n th∆∞ m·ª•c (thay th·∫ø "/")
    safe_model_name = model_name.replace("/", "_").replace("\\", "_")
    model_log_dir = os.path.join(LOG_BASE_DIR, safe_model_name)
    os.makedirs(model_log_dir, exist_ok=True)

    # T·∫°o logger
    logger = logging.getLogger(safe_model_name)
    logger.setLevel(log_level)

    # NgƒÉn kh√¥ng cho log lan truy·ªÅn ƒë·∫øn root logger ƒë·ªÉ tr√°nh in ra console 2 l·∫ßn
    logger.propagate = False

    # ƒê·ªãnh d·∫°ng cho log message
    formatter = logging.Formatter(
        "%(asctime)s - [%(levelname)s] - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
    )

    # T·∫°o File Handler ƒë·ªÉ ghi log ra file
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    log_file_path = os.path.join(model_log_dir, f"{timestamp}.log")

    file_handler = logging.FileHandler(log_file_path, encoding="utf-8")
    file_handler.setLevel(log_level)
    file_handler.setFormatter(formatter)

    # T·∫°o Console (Stream) Handler ƒë·ªÉ in log ra m√†n h√¨nh
    console_handler = logging.StreamHandler()
    console_handler.setLevel(log_level)
    console_handler.setFormatter(formatter)

    # Th√™m c√°c handler v√†o logger
    logger.addHandler(file_handler)
    logger.addHandler(console_handler)

    # L∆∞u logger v√†o cache
    _loggers[model_name] = logger

    logger.info(
        f"Logger cho '{safe_model_name}' ƒë√£ ƒë∆∞·ª£c kh·ªüi t·∫°o. File log: {log_file_path}"
    )

    return logger


## Setup logger

In [None]:
logger = setup_logger(f"{cfg.MODEL_NAME}-training")
logger.info(f"Logger initialized for {cfg.MODEL_NAME}")

logger.info("=" * 60)
logger.info("üöÄ STARTING TRAINING SESSION")
logger.info("=" * 60)
for key, value in Config.__dict__.items():
    if not key.startswith("__") and not callable(value):
        logger.info(f"{key}: {value}")
logger.info("=" * 60)


2025-10-17 14:07:11 - [INFO] - Logger cho 'FacebookAI_xlm-roberta-large-training' ƒë√£ ƒë∆∞·ª£c kh·ªüi t·∫°o. File log: logs/FacebookAI_xlm-roberta-large-training/2025-10-17_14-07-11.log
2025-10-17 14:07:11 - [INFO] - Logger initialized for FacebookAI/xlm-roberta-large
2025-10-17 14:07:11 - [INFO] - üöÄ STARTING TRAINING SESSION
2025-10-17 14:07:11 - [INFO] - ROOT_DIR: /home/guest/Projects/CS221
2025-10-17 14:07:11 - [INFO] - DATA_DIR: /home/guest/Projects/CS221/data
2025-10-17 14:07:11 - [INFO] - TRAIN_FILE: /home/guest/Projects/CS221/data/vihallu-train.csv
2025-10-17 14:07:11 - [INFO] - TEST_FILE: /home/guest/Projects/CS221/data/vihallu-public-test.csv
2025-10-17 14:07:11 - [INFO] - SUBMISSION_DIR: /home/guest/Projects/CS221/submission
2025-10-17 14:07:11 - [INFO] - SUBMISSION_CSV: submit.csv
2025-10-17 14:07:11 - [INFO] - SUBMISSION_ZIP: submit.zip
2025-10-17 14:07:11 - [INFO] - MODEL_NAME: FacebookAI/xlm-roberta-large
2025-10-17 14:07:11 - [INFO] - MODEL_OUTPUT_DIR: /home/guest/Pr

# Hallucination Dataset

In [None]:
# Cell [6] - ƒê√É C·∫¨P NH·∫¨T
import torch
from torch.utils.data import Dataset


class HallucinationDataset(Dataset):
    def __init__(self, premises, hypotheses, labels, tokenizer, max_len):
        self.premises = premises
        self.hypotheses = hypotheses
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        premise = self.premises[idx]
        hypothesis = self.hypotheses[idx]
        label = self.labels[idx]

        # Tokenize b·∫±ng c√°ch truy·ªÅn 2 chu·ªói ri√™ng bi·ªát
        # Tokenizer s·∫Ω t·ª± ƒë·ªông c·∫Øt b·ªõt `premise` n·∫øu c·∫ßn
        encoding = self.tokenizer.encode_plus(
            premise,
            hypothesis,  # <-- text_pair
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            truncation=True,
            return_attention_mask=True,
        )

        return {
            "input_ids": encoding["input_ids"],
            "attention_mask": encoding["attention_mask"],
            "labels": label,
        }


## Prepare data

In [None]:
from sklearn.model_selection import train_test_split
import pandas as pd


def prepare_data(config, logger=None):
    df = pd.read_csv(config.TRAIN_FILE)
    print(f"‚úÖ ƒê·ªçc th√†nh c√¥ng {len(df)} m·∫´u t·ª´ file ƒë√£ x·ª≠ l√Ω: {config.TRAIN_FILE}")

    # T·∫°o 2 c·ªôt premise v√† hypothesis t·ª´ ng·ªØ c·∫£nh (context)
    df["premise"] = (
        "C√¢u h·ªèi: "
        + df["prompt"].astype(str)
        + " Ng·ªØ c·∫£nh: "
        + df["context"].astype(str)
    )
    df["hypothesis"] = df["response"].astype(str)

    df["label_id"] = df["label"].map(config.LABEL_MAP)
    df.dropna(subset=["label_id"], inplace=True)
    df["label_id"] = df["label_id"].astype(int)

    train_df, val_df = train_test_split(
        df,
        test_size=config.VALIDATION_SPLIT_SIZE,
        random_state=config.RANDOM_STATE,
        stratify=df["label_id"],
    )

    if logger:
        logger.info(
            f"Chia d·ªØ li·ªáu: {len(train_df)} m·∫´u train, {len(val_df)} m·∫´u validation."
        )

    # --- PH·∫¶N N√ÇNG C·∫§P: L∆ØU FILE RA TH∆Ø M·ª§C DATA ---
    # T·∫°o th∆∞ m·ª•c 'processed' trong 'data' n·∫øu ch∆∞a c√≥
    processed_data_dir = os.path.join(config.DATA_DIR, "processed")
    os.makedirs(processed_data_dir, exist_ok=True)

    # ƒê·ªãnh nghƒ©a ƒë∆∞·ªùng d·∫´n file
    train_output_path = os.path.join(processed_data_dir, "train_split.csv")
    val_output_path = os.path.join(processed_data_dir, "validation_split.csv")

    # L∆∞u c√°c DataFrame
    train_df.to_csv(train_output_path, index=False, encoding="utf-8-sig")
    val_df.to_csv(val_output_path, index=False, encoding="utf-8-sig")

    print(f"‚úÖ ƒê√£ l∆∞u t·∫≠p train v√†o: {train_output_path}")
    print(f"‚úÖ ƒê√£ l∆∞u t·∫≠p validation v√†o: {val_output_path}")
    # --- K·∫æT TH√öC PH·∫¶N N√ÇNG C·∫§P ---

    return train_df, val_df


# Model

In [None]:
from transformers import AutoConfig, AutoTokenizer, AutoModelForSequenceClassification
import torch.nn as nn


def get_model_and_tokenizer(config):
    """T·∫£i pre-trained model v√† tokenizer."""
    print(f"ƒêang t·∫£i model: {config.MODEL_NAME}")
    tokenizer = AutoTokenizer.from_pretrained(config.MODEL_NAME)

    # T·∫£i config/model/tokenizer v·ªõi trust_remote_code=True ƒë·ªÉ cho ph√©p model custom
    cfg = AutoConfig.from_pretrained(config.MODEL_NAME, trust_remote_code=True)
    print(f"Model config: {cfg}")

    model = AutoModelForSequenceClassification.from_pretrained(
        config.MODEL_NAME, num_labels=len(config.LABEL_MAP)
    )

    # apply classifier dropout if provided in config
    if hasattr(config, "CLASSIFIER_DROPOUT"):
        if hasattr(model.config, "classifier_dropout"):
            model.config.classifier_dropout = config.CLASSIFIER_DROPOUT
        if hasattr(model.config, "hidden_dropout_prob"):
            model.config.hidden_dropout_prob = config.CLASSIFIER_DROPOUT

        if hasattr(model.config, "attention_probs_dropout_prob"):
            model.config.attention_probs_dropout_prob = min(
                0.15, max(0.1, config.CLASSIFIER_DROPOUT)
            )
        for m in model.modules():
            if isinstance(m, nn.Dropout):
                m.p = config.CLASSIFIER_DROPOUT
    return model, tokenizer


# Training

In [None]:
import os
import math
import torch
import numpy as np
import pandas as pd
from dotenv import load_dotenv
from tqdm.auto import tqdm
from torch.optim import AdamW
from huggingface_hub import login
from transformers import get_scheduler
from torch.utils.data import DataLoader
from tqdm.contrib.logging import logging_redirect_tqdm
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import f1_score, accuracy_score, classification_report
from functools import partial


## train one epoch function

In [None]:
def train_one_epoch(
    model,
    data_loader,
    loss_fn,
    optimizer,
    scheduler,
    device,
    epoch=None,
    total_epochs=None,
    gradient_accumulation_steps=1,
):
    """Hu·∫•n luy·ªán m√¥ h√¨nh trong m·ªôt epoch b·∫±ng gradient accumulation."""
    model.train()
    total_loss = 0
    desc = f"Train" if epoch is None else f"Epoch {epoch}/{total_epochs}"
    progress_bar = tqdm(
        data_loader, desc=desc, leave=False, dynamic_ncols=True, mininterval=0.5
    )

    optimizer.zero_grad()
    steps_in_epoch = len(data_loader)
    with logging_redirect_tqdm():  # make logger calls safe
        for step, batch in enumerate(progress_bar):
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits  # shape (batch_size, num_labels)

            loss = loss_fn(logits, labels)
            total_loss += loss.item()
            scaled_loss = loss / gradient_accumulation_steps
            scaled_loss.backward()

            if (step + 1) % gradient_accumulation_steps == 0 or (
                step + 1
            ) == steps_in_epoch:
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()
                scheduler.step()
                optimizer.zero_grad()

            progress_bar.set_postfix({"loss": f"{loss.item():.4f}"})

    return total_loss / len(data_loader)


## Evaluate

In [None]:
def evaluate(model, data_loader, loss_fn, device):
    """ƒê√°nh gi√° m√¥ h√¨nh tr√™n t·∫≠p d·ªØ li·ªáu."""
    model.eval()
    all_preds = []
    all_labels = []
    total_val_loss = 0

    progress_bar = tqdm(data_loader, desc="Evaluating", leave=False, dynamic_ncols=True)

    with torch.no_grad(), logging_redirect_tqdm():
        for batch in progress_bar:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits

            # <<< T√çNH LOSS TR√äN T·∫¨P VALIDATION
            loss = loss_fn(logits, labels)
            total_val_loss += loss.item()

            preds = torch.argmax(outputs.logits, dim=-1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    avg_val_loss = total_val_loss / len(data_loader)  # <<< T√çNH LOSS TRUNG B√åNH
    return all_labels, all_preds, avg_val_loss  # <<< TR·∫¢ V·ªÄ TH√äM LOSS


# Main

In [None]:
# T·∫£i bi·∫øn m√¥i tr∆∞·ªùng t·ª´ file envs/.env.
dotenv_path = os.path.join(os.getcwd(), "envs", ".env")
load_dotenv(dotenv_path)
print(f"dotenv_path: {dotenv_path}")


dotenv_path: /home/guest/Projects/CS221/envs/.env


In [11]:
# l·∫•y HF token ƒë·ªÉ login
hf_token = os.getenv("HUGGING_FACE_TOKEN")

if hf_token:
    print("INFO: T√¨m th·∫•y HUGGING_FACE_TOKEN. ƒêang ƒëƒÉng nh·∫≠p...")
    login(token=hf_token)
    print("INFO: ƒêƒÉng nh·∫≠p Hugging Face th√†nh c√¥ng.")
else:
    print(
        "WARNING: Kh√¥ng t√¨m th·∫•y HUGGING_FACE_TOKEN trong file .env. M·ªôt s·ªë model c√≥ th·ªÉ y√™u c·∫ßu ƒëƒÉng nh·∫≠p."
    )


INFO: T√¨m th·∫•y HUGGING_FACE_TOKEN. ƒêang ƒëƒÉng nh·∫≠p...
INFO: ƒêƒÉng nh·∫≠p Hugging Face th√†nh c√¥ng.


## 1. Chu·∫©n b·ªã d·ªØ li·ªáu

In [12]:
logger.info("B·∫Øt ƒë·∫ßu pipeline hu·∫•n luy·ªán.")

# 1. Chu·∫©n b·ªã d·ªØ li·ªáu
logger.info("B∆∞·ªõc 1: Chu·∫©n b·ªã d·ªØ li·ªáu...")
train_df, val_df = prepare_data(cfg, logger=logger)
if train_df is None:
    logger.error("D·ªØ li·ªáu kh√¥ng th·ªÉ ƒë∆∞·ª£c chu·∫©n b·ªã. D·ª´ng ch∆∞∆°ng tr√¨nh.")


2025-10-17 14:07:14 - [INFO] - B·∫Øt ƒë·∫ßu pipeline hu·∫•n luy·ªán.
2025-10-17 14:07:14 - [INFO] - B∆∞·ªõc 1: Chu·∫©n b·ªã d·ªØ li·ªáu...
2025-10-17 14:07:14 - [INFO] - Chia d·ªØ li·ªáu: 5600 m·∫´u train, 1400 m·∫´u validation.


‚úÖ ƒê·ªçc th√†nh c√¥ng 7000 m·∫´u t·ª´ file ƒë√£ x·ª≠ l√Ω: /home/guest/Projects/CS221/data/vihallu-train.csv
‚úÖ ƒê√£ l∆∞u t·∫≠p train v√†o: /home/guest/Projects/CS221/data/processed/train_split.csv
‚úÖ ƒê√£ l∆∞u t·∫≠p validation v√†o: /home/guest/Projects/CS221/data/processed/validation_split.csv


## 2. T·∫£i model v√† tokenizer

In [13]:
logger.info(f"B∆∞·ªõc 2: T·∫£i model '{cfg.MODEL_NAME}' v√† tokenizer...")
model, tokenizer = get_model_and_tokenizer(cfg)


2025-10-17 14:07:14 - [INFO] - B∆∞·ªõc 2: T·∫£i model 'FacebookAI/xlm-roberta-large' v√† tokenizer...


ƒêang t·∫£i model: FacebookAI/xlm-roberta-large
Model config: XLMRobertaConfig {
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "output_past": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.57.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 250002
}



Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
# %pip install torchinfo


In [15]:
from torchinfo import summary
import torch  # ƒê·∫£m b·∫£o ƒë√£ import torch

logger.info("Ph√¢n t√≠ch ki·∫øn tr√∫c m√¥ h√¨nh b·∫±ng torchinfo...")

# --- D√πng torchinfo ƒë·ªÉ hi·ªÉn th·ªã ---
# T·∫°o m·ªôt input gi·∫£ v·ªõi batch_size v√† max_length nh∆∞ trong config
input_ids_example = torch.randint(
    0, tokenizer.vocab_size, (cfg.BATCH_SIZE, cfg.MAX_LENGTH)
)

# 1. G·ªçi summary v·ªõi verbose=0 ƒë·ªÉ kh√¥ng in ra console v√† l∆∞u k·∫øt qu·∫£ v√†o bi·∫øn
#    Th√™m c√°c c·ªôt b·∫°n mu·ªën xem, v√≠ d·ª•: 'output_size', 'num_params'
model_summary = summary(
    model,
    input_data={"input_ids": input_ids_example},
    verbose=0,  # <-- Quan tr·ªçng: NgƒÉn kh√¥ng cho t·ª± ƒë·ªông in
    col_names=["input_size", "output_size", "num_params", "mult_adds"],
)

# 2. Chuy·ªÉn ƒë·ªëi t∆∞·ª£ng summary th√†nh string v√† ƒë∆∞a v√†o logger
logger.info(f"Ki·∫øn tr√∫c chi ti·∫øt c·ªßa m√¥ h√¨nh:\n{str(model_summary)}")


# # (T√πy ch·ªçn) B·∫°n v·∫´n c√≥ th·ªÉ in ra m√†n h√¨nh n·∫øu mu·ªën xem ngay trong notebook
# print("In summary ra m√†n h√¨nh notebook:")
# print(model_summary)


2025-10-17 14:07:17 - [INFO] - Ph√¢n t√≠ch ki·∫øn tr√∫c m√¥ h√¨nh b·∫±ng torchinfo...
2025-10-17 14:07:20 - [INFO] - Ki·∫øn tr√∫c chi ti·∫øt c·ªßa m√¥ h√¨nh:
Layer (type:depth-idx)                                            Input Shape               Output Shape              Param #                   Mult-Adds
XLMRobertaForSequenceClassification                               --                        [4, 3]                    --                        --
‚îú‚îÄXLMRobertaModel: 1-1                                            [4, 512]                  [4, 512, 1024]            --                        --
‚îÇ    ‚îî‚îÄXLMRobertaEmbeddings: 2-1                                  --                        [4, 512, 1024]            --                        --
‚îÇ    ‚îÇ    ‚îî‚îÄEmbedding: 3-1                                        [4, 512]                  [4, 512, 1024]            256,002,048               1,024,008,192
‚îÇ    ‚îÇ    ‚îî‚îÄEmbedding: 3-2                                     

## 3. T·∫°o Dataset v√† DataLoader

In [16]:
from torch.utils.data import DataLoader
from transformers import DataCollatorWithPadding  # <-- 1. Import DataCollator

# --- T·∫†O DATASET V√Ä DATALOADER ---
logger.info("B∆∞·ªõc 3: T·∫°o Dataset v√† DataLoader...")

# T·∫°o Dataset (v·ªõi class HallucinationDataset ƒë√£ ƒë∆∞·ª£c ch·ªânh s·ª≠a ·ªü tr√™n)
train_dataset = HallucinationDataset(
    premises=train_df["premise"].to_list(),
    hypotheses=train_df["hypothesis"].to_list(),
    labels=train_df["label_id"].to_list(),
    tokenizer=tokenizer,
    max_len=cfg.MAX_LENGTH,
)
val_dataset = HallucinationDataset(
    premises=val_df["premise"].to_list(),
    hypotheses=val_df["hypothesis"].to_list(),
    labels=val_df["label_id"].to_list(),
    tokenizer=tokenizer,
    max_len=cfg.MAX_LENGTH,
)

# 3. T·∫°o m·ªôt instance c·ªßa DataCollatorWithPadding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# 4. T·∫°o DataLoader v√† truy·ªÅn data_collator v√†o
train_loader = DataLoader(
    train_dataset,
    batch_size=cfg.BATCH_SIZE,
    shuffle=True,
    collate_fn=data_collator,  # <-- D√πng data_collator ·ªü ƒë√¢y
)
val_loader = DataLoader(
    val_dataset,
    batch_size=cfg.BATCH_SIZE,
    collate_fn=data_collator,  # <-- D√πng data_collator ·ªü ƒë√¢y
)

logger.info("‚úÖ T·∫°o DataLoader th√†nh c√¥ng v·ªõi DataCollatorWithPadding chu·∫©n!")


2025-10-17 14:07:20 - [INFO] - B∆∞·ªõc 3: T·∫°o Dataset v√† DataLoader...
2025-10-17 14:07:20 - [INFO] - ‚úÖ T·∫°o DataLoader th√†nh c√¥ng v·ªõi DataCollatorWithPadding chu·∫©n!
2025-10-17 14:07:20 - [INFO] - ‚úÖ T·∫°o DataLoader th√†nh c√¥ng v·ªõi DataCollatorWithPadding chu·∫©n!


In [17]:
gradient_accumulation_steps = max(1, cfg.GRADIENT_ACCUMULATION_STEPS)
effective_batch_size = cfg.BATCH_SIZE * gradient_accumulation_steps
logger.info(
    "Gradient accumulation steps: %s | Effective batch size: %s",
    gradient_accumulation_steps,
    effective_batch_size,
)


2025-10-17 14:07:20 - [INFO] - Gradient accumulation steps: 4 | Effective batch size: 16


### Check dataset

In [18]:
print("\n--- Ki·ªÉm tra 1 batch d·ªØ li·ªáu ƒë·∫ßu v√†o ---")
sample_batch = next(iter(train_loader))

print("K√≠ch th∆∞·ªõc input_ids:", sample_batch["input_ids"].shape)
print("K√≠ch th∆∞·ªõc attention_mask:", sample_batch["attention_mask"].shape)
print("Nh√£n trong batch:", sample_batch["labels"])

# Gi·∫£i m√£ m·ªôt m·∫´u ƒë·ªÉ xem n√≥ tr√¥ng nh∆∞ th·∫ø n√†o
decoded_text = tokenizer.decode(sample_batch["input_ids"][0], skip_special_tokens=False)
print("\nM·ªôt m·∫´u ƒë√£ ƒë∆∞·ª£c token h√≥a v√† gi·∫£i m√£ l·∫°i:")
print(decoded_text)
print("------------------------------------------\n")
# --- K·∫æT TH√öC B∆Ø·ªöC KI·ªÇM TRA ---



--- Ki·ªÉm tra 1 batch d·ªØ li·ªáu ƒë·∫ßu v√†o ---
K√≠ch th∆∞·ªõc input_ids: torch.Size([4, 342])
K√≠ch th∆∞·ªõc attention_mask: torch.Size([4, 342])
Nh√£n trong batch: tensor([1, 1, 2, 0])

M·ªôt m·∫´u ƒë√£ ƒë∆∞·ª£c token h√≥a v√† gi·∫£i m√£ l·∫°i:
<s> C√¢u h·ªèi: S·ª± ƒë·ªëii ƒë√¢u cƒÉng th·∫≥ng xra gi·ªØax c√°c th·∫ø l·ª±c nao trong Mexicp? Ng·ªØ c·∫£nh: B√™n trong M√©xico, cƒÉng th·∫≥ng v·∫´n ti·∫øp di·ªÖn gi·ªØa phe li√™n bang ch·ªß nghƒ©a v√† phe trung ∆∞∆°ng t·∫≠p quy·ªÅn ch·ªß nghƒ©a. V√†o ƒë·∫ßu nƒÉm 1835, nh·ªØng ng∆∞·ªùi Texas th·∫≠n tr·ªçng ƒë√£ th√†nh l·∫≠p n√™n ·ª¶y ban T∆∞∆°ng ·ª©ng v√† An to√†n. T√¨nh tr·∫°ng n√°o ƒë·ªông b√πng ph√°t th√†nh xung ƒë·ªôt v≈© trang v√†o cu·ªëi nƒÉm 1835 t·∫°i tr·∫≠n Gonzales. S·ª± ki·ªán n√†y kh·ªüi ƒë·∫ßu C√°ch m·∫°ng Texas, v√† trong v√≤ng hai th√°ng sau ƒë√≥, ng∆∞·ªùi Texas ƒë√°nh b·∫°i t·∫•t c·∫£ c√°c ƒë·ªôi qu√¢n M√©xico t·∫°i khu v·ª±c. Ng∆∞·ªùi Texas b·∫ßu ra c√°c ƒë·∫°i di·ªán c·ªßa Consultation, th·ªÉ ch·∫ø n√†y l·∫≠p n√™n m·ªôt 

In [19]:
print("\n--- Ki·ªÉm tra chi ti·∫øt 5 m·∫´u ƒë·∫ßu ti√™n ƒë·ªÉ so s√°nh tr∆∞·ªõc v√† sau khi x·ª≠ l√Ω ---")

# L·∫•y 5 m·∫´u ƒë·∫ßu ti√™n t·ª´ DataFrame g·ªëc ƒë·ªÉ so s√°nh
num_samples_to_check = 5
for i in range(num_samples_to_check):
    print(f"\n=============== M·∫™U {i} ===============")

    # 1. L·∫•y d·ªØ li·ªáu g·ªëc t·ª´ DataFrame
    original_premise = train_df["premise"].iloc[i]
    original_hypothesis = train_df["hypothesis"].iloc[i]
    # N·ªëi 2 chu·ªói l·∫°i gi·ªëng c√°ch tokenizer s·∫Ω th·∫•y ch√∫ng
    original_combined_text = original_premise + " [SEP] " + original_hypothesis

    # 2. L·∫•y d·ªØ li·ªáu ƒë√£ ƒë∆∞·ª£c x·ª≠ l√Ω t·ª´ Dataset
    processed_sample = train_dataset[i]
    processed_input_ids = processed_sample["input_ids"]

    # 3. Gi·∫£i m√£ (decode) c√°c input_ids ƒë√£ x·ª≠ l√Ω tr·ªü l·∫°i th√†nh vƒÉn b·∫£n
    decoded_text = tokenizer.decode(processed_input_ids, skip_special_tokens=False)

    # 4. So s√°nh v√† in k·∫øt qu·∫£
    original_token_count = len(tokenizer.encode(original_premise, original_hypothesis))
    processed_token_count = len(processed_input_ids)

    print(f"S·ªë token g·ªëc (∆∞·ªõc t√≠nh): {original_token_count}")
    print(
        f"S·ªë token sau khi x·ª≠ l√Ω (gi·ªõi h·∫°n b·ªüi max_len={cfg.MAX_LENGTH}): {processed_token_count}"
    )

    if original_token_count > cfg.MAX_LENGTH:
        print("‚ö†Ô∏è  C·∫¢NH B√ÅO: M·∫´u n√†y ƒë√£ b·ªã c·∫Øt b·ªõt (truncated)!")
    else:
        print("‚úÖ  OK: ƒê·ªô d√†i m·∫´u n·∫±m trong gi·ªõi h·∫°n, kh√¥ng b·ªã c·∫Øt.")

    print("\n--- VƒÉn b·∫£n G·ªêC  ---")
    print(original_combined_text)

    print("\n--- VƒÉn b·∫£n SAU KHI DECODE t·ª´ input_ids ---")
    print(decoded_text)

print("\n===========================================")
print(
    "Ki·ªÉm tra ho√†n t·∫•t. H√£y so s√°nh vƒÉn b·∫£n tr√™n ƒë·ªÉ xem c√≥ s·ª± kh√°c bi·ªát ·ªü cu·ªëi chu·ªói kh√¥ng."
)



--- Ki·ªÉm tra chi ti·∫øt 5 m·∫´u ƒë·∫ßu ti√™n ƒë·ªÉ so s√°nh tr∆∞·ªõc v√† sau khi x·ª≠ l√Ω ---

S·ªë token g·ªëc (∆∞·ªõc t√≠nh): 280
S·ªë token sau khi x·ª≠ l√Ω (gi·ªõi h·∫°n b·ªüi max_len=512): 280
‚úÖ  OK: ƒê·ªô d√†i m·∫´u n·∫±m trong gi·ªõi h·∫°n, kh√¥ng b·ªã c·∫Øt.

--- VƒÉn b·∫£n G·ªêC  ---
C√¢u h·ªèi: Chi·∫øn s·ª± nƒÉm 1950 ƒë√£ d√°nh b·∫°i qu√¢n Hoa K·ª≥ o chi√™n tr·∫°n n√†o? Ng·ªØ c·∫£nh: T·ª´ ng√†y 25 th√°ng 10 ƒë·∫øn ng√†y 5 th√°ng 11 (1950) l√† chi·∫øn d·ªãch ƒë·∫ßu ti√™n c·ªßa Trung Qu·ªëc. Qu√¢n Trung Qu·ªëc d√πng 2 s∆∞ ƒëo√†n c·ªßa qu√¢n ƒëo√†n 42 t·ªï ch·ª©c ph√≤ng ng·ª± ·ªü khu v·ª±c Ho√†ng Th·∫£o Lƒ©nh, Ph√≥ Chi·∫øn Lƒ©nh thu·ªôc m·∫∑t tr·∫≠n mi·ªÅn ƒë√¥ng, l·∫°i d√πng 3 qu√¢n ƒëo√†n v√† m·ªôt s∆∞ ƒëo√†n c·ªßa qu√¢n ƒëo√†n 42 (sau tƒÉng th√™m 2 qu√¢n ƒëo√†n) ph·∫£n k√≠ch ·ªü m·∫∑t tr·∫≠n mi·ªÅn T√¢y. Chi·∫øn d·ªãch n√†y ƒë√£ ƒë√°nh lui qu√¢n M·ªπ ƒë·∫øn ph√≠a nam s√¥ng Thanh Xuy√™n. Ng√†y 7 th√°ng 11, c√°c qu√¢n ƒëo√†n 20, 26, 27 thu·ªôc Binh ƒëo√†n 9 qu√¢n Ch√≠ nguy

## 4. Thi·∫øt l·∫≠p Hu·∫•n luy·ªán

In [20]:
import torch  # ƒê·∫£m b·∫£o ƒë√£ import torch

logger.info("B∆∞·ªõc 4: Thi·∫øt l·∫≠p m√¥i tr∆∞·ªùng hu·∫•n luy·ªán v√† ki·∫øn tr√∫c model...")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- Log th√¥ng tin thi·∫øt b·ªã (GPU/CPU) ---
logger.info(f"S·ª≠ d·ª•ng thi·∫øt b·ªã: {device}")
if device.type == "cuda":
    gpu_count = torch.cuda.device_count()
    gpu_name = torch.cuda.get_device_name(0)
    logger.info(f"‚úÖ T√¨m th·∫•y {gpu_count} GPU(s).")
    logger.info(f"‚úÖ ƒêang s·ª≠ d·ª•ng GPU: {gpu_name}")
else:
    logger.warning("‚ö†Ô∏è Kh√¥ng t√¨m th·∫•y GPU, s·ª≠ d·ª•ng CPU. Qu√° tr√¨nh training s·∫Ω r·∫•t ch·∫≠m.")

# --- B·∫ÆT ƒê·∫¶U PH·∫¶N TH√äM M·ªöI ---
# Chuy·ªÉn to√†n b·ªô ki·∫øn tr√∫c model th√†nh d·∫°ng string ƒë·ªÉ ƒë∆∞a v√†o logger
model_architecture_string = str(model)

# Ghi log ki·∫øn tr√∫c model
logger.info(f"Ki·∫øn tr√∫c c·ªßa m√¥ h√¨nh:\n{model_architecture_string}")
# --- K·∫æT TH√öC PH·∫¶N TH√äM M·ªöI ---

# Di chuy·ªÉn model ƒë·∫øn device ƒë√£ ch·ªçn
model.to(device)


2025-10-17 14:07:20 - [INFO] - B∆∞·ªõc 4: Thi·∫øt l·∫≠p m√¥i tr∆∞·ªùng hu·∫•n luy·ªán v√† ki·∫øn tr√∫c model...
2025-10-17 14:07:20 - [INFO] - S·ª≠ d·ª•ng thi·∫øt b·ªã: cuda
2025-10-17 14:07:20 - [INFO] - ‚úÖ T√¨m th·∫•y 1 GPU(s).
2025-10-17 14:07:20 - [INFO] - ‚úÖ ƒêang s·ª≠ d·ª•ng GPU: NVIDIA GeForce RTX 5070 Ti
2025-10-17 14:07:20 - [INFO] - Ki·∫øn tr√∫c c·ªßa m√¥ h√¨nh:
XLMRobertaForSequenceClassification(
  (roberta): XLMRobertaModel(
    (embeddings): XLMRobertaEmbeddings(
      (word_embeddings): Embedding(250002, 1024, padding_idx=1)
      (position_embeddings): Embedding(514, 1024, padding_idx=1)
      (token_type_embeddings): Embedding(1, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): XLMRobertaEncoder(
      (layer): ModuleList(
        (0-23): 24 x XLMRobertaLayer(
          (attention): XLMRobertaAttention(
            (self): XLMRobertaSdpaSelfAttention(
              (que

XLMRobertaForSequenceClassification(
  (roberta): XLMRobertaModel(
    (embeddings): XLMRobertaEmbeddings(
      (word_embeddings): Embedding(250002, 1024, padding_idx=1)
      (position_embeddings): Embedding(514, 1024, padding_idx=1)
      (token_type_embeddings): Embedding(1, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): XLMRobertaEncoder(
      (layer): ModuleList(
        (0-23): 24 x XLMRobertaLayer(
          (attention): XLMRobertaAttention(
            (self): XLMRobertaSdpaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): XLMRobertaSelfOutput(
              (dense): Linear(in_features=1024, ou

In [21]:
optimizer = AdamW(
    model.parameters(),
    lr=cfg.LEARNING_RATE,
    weight_decay=cfg.WEIGHT_DECAY,
    eps=cfg.EPSILON,
)


In [22]:
from transformers import (
    get_scheduler,
    get_cosine_with_hard_restarts_schedule_with_warmup,
)

num_update_steps_per_epoch = math.ceil(len(train_loader) / gradient_accumulation_steps)
num_training_steps = num_update_steps_per_epoch * cfg.EPOCHS
logger.info(
    "Scheduler will run for %s total steps (%s per epoch)",
    num_training_steps,
    num_update_steps_per_epoch,
)

if cfg.TOTAL_STEP_SCALE <= 0:
    warmup_steps = 0
elif cfg.TOTAL_STEP_SCALE <= 1:
    warmup_steps = max(1, int(cfg.TOTAL_STEP_SCALE * num_training_steps))
else:
    warmup_steps = min(int(cfg.TOTAL_STEP_SCALE), num_training_steps)

# <<< TH√äM KH·ªêI L·ªÜNH IF ƒê·ªÇ X·ª¨ L√ù TR∆Ø·ªúNG H·ª¢P ƒê·∫∂C BI·ªÜT
if cfg.SCHEDULER_TYPE == "cosine_with_restarts":
    logger.info(
        f"S·ª≠ d·ª•ng scheduler chuy√™n d·ª•ng: cosine_with_hard_restarts_schedule_with_warmup v·ªõi {cfg.NUM_CYCLES} chu k·ª≥."
    )
    scheduler = get_cosine_with_hard_restarts_schedule_with_warmup(
        optimizer=optimizer,
        num_warmup_steps=warmup_steps,
        num_training_steps=num_training_steps,
        num_cycles=cfg.NUM_CYCLES,  # <-- Tham s·ªë chuy√™n bi·ªát ho·∫°t ƒë·ªông ·ªü ƒë√¢y!
    )
else:
    # Gi·ªØ l·∫°i h√†m get_scheduler chung cho t·∫•t c·∫£ c√°c lo·∫°i scheduler kh√°c
    logger.info(f"S·ª≠ d·ª•ng scheduler chung: {cfg.SCHEDULER_TYPE}")
    scheduler = get_scheduler(
        cfg.SCHEDULER_TYPE,
        optimizer=optimizer,
        num_warmup_steps=warmup_steps,
        num_training_steps=num_training_steps,
    )

logger.info("Warmup steps: %s", warmup_steps)


2025-10-17 14:07:21 - [INFO] - Scheduler will run for 3500 total steps (350 per epoch)
2025-10-17 14:07:21 - [INFO] - S·ª≠ d·ª•ng scheduler chung: constant_with_warmup
2025-10-17 14:07:21 - [INFO] - Warmup steps: 350
2025-10-17 14:07:21 - [INFO] - S·ª≠ d·ª•ng scheduler chung: constant_with_warmup
2025-10-17 14:07:21 - [INFO] - Warmup steps: 350


In [23]:
# Chuy·ªÉn class weights t·ª´ config th√†nh tensor v√† ƒë∆∞a l√™n device
if cfg.CLASS_WEIGHTS:
    logger.info("S·ª≠ d·ª•ng Class Weights & Label smoothing cho h√†m loss.")
    class_weights_tensor = torch.tensor(cfg.CLASS_WEIGHTS, dtype=torch.float).to(device)
    loss_fn = torch.nn.CrossEntropyLoss(
        weight=class_weights_tensor,
        label_smoothing=cfg.LABEL_SMOOTHING,
    ).to(device)
else:
    logger.info("S·ª≠ d·ª•ng CrossEntropyLoss th√¥ng th∆∞·ªùng (kh√¥ng c√≥ tr·ªçng s·ªë).")
    loss_fn = torch.nn.CrossEntropyLoss().to(device)


2025-10-17 14:07:21 - [INFO] - S·ª≠ d·ª•ng Class Weights & Label smoothing cho h√†m loss.


## 5. V√≤ng l·∫∑p Hu·∫•n luy·ªán

In [None]:
best_macro_f1 = 0.0
patience_counter = 0  # bien dem => early stopped khi f1 ko tang them => overfitting

for epoch in range(cfg.EPOCHS):
    logger.info(f"--- Epoch {epoch + 1}/{cfg.EPOCHS} ---")

    avg_train_loss = train_one_epoch(
        model,
        train_loader,
        loss_fn,
        optimizer,
        scheduler,
        device,
        epoch + 1,
        cfg.EPOCHS,
        gradient_accumulation_steps=gradient_accumulation_steps,
    )
    logger.info(f"Loss trung b√¨nh tr√™n t·∫≠p train: {avg_train_loss:.4f}")

    current_lr = optimizer.param_groups[0]["lr"]
    logger.info(
        f"Current Learning Rate: {current_lr:.2e}"
    )  # D√πng ƒë·ªãnh d·∫°ng khoa h·ªçc e.g., 8.00e-06

    # ƒê√°nh gi√° tr√™n t·∫≠p validation
    logger.info("B·∫Øt ƒë·∫ßu ƒë√°nh gi√° tr√™n t·∫≠p validation...")
    val_labels, val_preds, avg_val_loss = evaluate(model, val_loader, loss_fn, device)

    accuracy = accuracy_score(val_labels, val_preds)
    macro_f1 = f1_score(val_labels, val_preds, average="macro")

    logger.info(f"Validation Loss: {avg_val_loss:.4f}")
    logger.info(f"Validation Accuracy: {accuracy:.4f}")
    logger.info(f"Validation Macro-F1: {macro_f1:.4f}")

    # # In classification report chi ti·∫øt
    target_names = [cfg.ID2LABEL[i] for i in range(len(cfg.LABEL_MAP))]

    # In classification report chi ti·∫øt (c√≥ th·ªÉ gi·ªØ l·∫°i print ho·∫∑c log t·ª´ng d√≤ng)
    report = classification_report(
        val_labels,
        val_preds,
        target_names=[cfg.ID2LABEL[i] for i in range(len(cfg.LABEL_MAP))],
        digits=4,
    )
    logger.info(f"Classification Report tr√™n t·∫≠p validation:\n{report}")

    # L∆∞u l·∫°i model t·ªët nh·∫•t d·ª±a tr√™n Macro-F1
    if macro_f1 > best_macro_f1:
        best_macro_f1 = macro_f1
        patience_counter = 0  # << RESET B·ªò ƒê·∫æM

        logger.info(
            f"üéâ Macro-F1 c·∫£i thi·ªán. ƒêang l∆∞u model t·ªët nh·∫•t v√†o '{cfg.MODEL_OUTPUT_DIR}'..."
        )
        if not os.path.exists(cfg.MODEL_OUTPUT_DIR):
            os.makedirs(cfg.MODEL_OUTPUT_DIR)

        model.save_pretrained(cfg.MODEL_OUTPUT_DIR)
        tokenizer.save_pretrained(cfg.MODEL_OUTPUT_DIR)
        logger.info("L∆∞u model th√†nh c√¥ng.")
    else:
        patience_counter += 1
        logger.warning(
            f"Macro-F1 kh√¥ng c·∫£i thi·ªán. Patience: {patience_counter}/{cfg.PATIENCE_LIMIT}"
        )
        if patience_counter >= cfg.PATIENCE_LIMIT:
            logger.info("Early stopping! D·ª´ng hu·∫•n luy·ªán.")
            break


2025-10-17 14:07:21 - [INFO] - --- Epoch 1/10 ---


Epoch 1/10:   0%|          | 0/1400 [00:00<?, ?it/s]

2025-10-17 14:11:45 - [INFO] - Loss trung b√¨nh tr√™n t·∫≠p train: 1.1033
2025-10-17 14:11:45 - [INFO] - Current Learning Rate: 8.00e-06
2025-10-17 14:11:45 - [INFO] - B·∫Øt ƒë·∫ßu ƒë√°nh gi√° tr√™n t·∫≠p validation...


Evaluating:   0%|          | 0/350 [00:00<?, ?it/s]

2025-10-17 14:12:06 - [INFO] - Validation Loss: 1.0034
2025-10-17 14:12:06 - [INFO] - Validation Accuracy: 0.3943
2025-10-17 14:12:06 - [INFO] - Validation Macro-F1: 0.2649
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
2025-10-17 14:12:06 - [INFO] - Classification Report tr√™n t·∫≠p validation:
              precision    recall  f1-score   support

          no     0.8625    0.1537    0.2609       449
   extrinsic     0.0000    0.0000    0.0000       461
   intrinsic     0.3659    0.9857    0.5337       490

    accuracy                         0.3943      1400
   macro avg     0.4095    0.3798    0.2649      1400
weighted avg     0.4047    0.3943    0.2705      1400

2025-10-17 14:12:06 - [INFO] - üéâ Macro-F1 c·∫£i thi·ªán. ƒêang l∆∞u model t·ªët nh·∫•t v√†o '/home/guest/Projects/CS221/models/xlm-robe

Epoch 2/10:   0%|          | 0/1400 [00:00<?, ?it/s]

2025-10-17 14:16:22 - [INFO] - Loss trung b√¨nh tr√™n t·∫≠p train: 0.8490
2025-10-17 14:16:22 - [INFO] - Current Learning Rate: 8.00e-06
2025-10-17 14:16:22 - [INFO] - B·∫Øt ƒë·∫ßu ƒë√°nh gi√° tr√™n t·∫≠p validation...


Evaluating:   0%|          | 0/350 [00:00<?, ?it/s]

2025-10-17 14:16:43 - [INFO] - Validation Loss: 0.8177
2025-10-17 14:16:43 - [INFO] - Validation Accuracy: 0.7186
2025-10-17 14:16:43 - [INFO] - Validation Macro-F1: 0.7214
2025-10-17 14:16:43 - [INFO] - Classification Report tr√™n t·∫≠p validation:
              precision    recall  f1-score   support

          no     0.8184    0.6526    0.7261       449
   extrinsic     0.7701    0.7484    0.7591       461
   intrinsic     0.6195    0.7510    0.6790       490

    accuracy                         0.7186      1400
   macro avg     0.7360    0.7173    0.7214      1400
weighted avg     0.7329    0.7186    0.7205      1400

2025-10-17 14:16:43 - [INFO] - üéâ Macro-F1 c·∫£i thi·ªán. ƒêang l∆∞u model t·ªët nh·∫•t v√†o '/home/guest/Projects/CS221/models/xlm-roberta-large-tuned'...
2025-10-17 14:16:45 - [INFO] - L∆∞u model th√†nh c√¥ng.
2025-10-17 14:16:45 - [INFO] - --- Epoch 3/10 ---


Epoch 3/10:   0%|          | 0/1400 [00:00<?, ?it/s]

In [None]:
logger.info("üèÅ Qu√° tr√¨nh hu·∫•n luy·ªán ho√†n t·∫•t.")
logger.info(
    f"Model t·ªët nh·∫•t v·ªõi Macro-F1 = {best_macro_f1:.4f} ƒë√£ ƒë∆∞·ª£c l∆∞u t·∫°i '{cfg.MODEL_OUTPUT_DIR}'"
)


2025-10-17 14:06:49 - [INFO] - üèÅ Qu√° tr√¨nh hu·∫•n luy·ªán ho√†n t·∫•t.
2025-10-17 14:06:49 - [INFO] - Model t·ªët nh·∫•t v·ªõi Macro-F1 = 0.7799 ƒë√£ ƒë∆∞·ª£c l∆∞u t·∫°i '/home/guest/Projects/CS221/models/xlm-roberta-large-tuned'
2025-10-17 14:06:49 - [INFO] - Model t·ªët nh·∫•t v·ªõi Macro-F1 = 0.7799 ƒë√£ ƒë∆∞·ª£c l∆∞u t·∫°i '/home/guest/Projects/CS221/models/xlm-roberta-large-tuned'


# Ph√¢n ph·ªëi k·∫øt qu·∫£ ƒë√∫ng/sai theo t·ª´ng l·ªõp

In [None]:
val_label_names = [cfg.ID2LABEL[label_id] for label_id in val_labels]
pred_label_names = [cfg.ID2LABEL[pred_id] for pred_id in val_preds]
evaluation_df = pd.DataFrame(
    {
        "true_label": val_label_names,
        "predicted_label": pred_label_names,
    }
)
evaluation_df["status"] = evaluation_df.apply(
    lambda row: (
        "correct" if row["true_label"] == row["predicted_label"] else "incorrect"
    ),
    axis=1,
)
distribution_table = (
    evaluation_df.groupby(["true_label", "status"])
    .size()
    .unstack(fill_value=0)
    .rename_axis(None, axis=1)
    .reset_index()
    .sort_values("true_label")
)

# 1. Th√™m c·ªôt 'total' b·∫±ng c√°ch c·ªông c·ªôt 'correct' v√† 'incorrect'
distribution_table["total"] = (
    distribution_table["correct"] + distribution_table["incorrect"]
)

# 2. Th√™m c·ªôt t·ªâ l·ªá ƒë√∫ng (correct_rate)
distribution_table["correct_rate"] = (
    distribution_table["correct"] / distribution_table["total"]
)

# 3. Th√™m c·ªôt t·ªâ l·ªá sai (incorrect_rate)
distribution_table["incorrect_rate"] = (
    distribution_table["incorrect"] / distribution_table["total"]
)

# (T√πy ch·ªçn) Format c√°c c·ªôt t·ªâ l·ªá th√†nh d·∫°ng ph·∫ßn trƒÉm cho d·ªÖ ƒë·ªçc
distribution_table["correct_rate"] = distribution_table["correct_rate"].map(
    "{:.2%}".format
)
distribution_table["incorrect_rate"] = distribution_table["incorrect_rate"].map(
    "{:.2%}".format
)

# In ra b·∫£ng k·∫øt qu·∫£
logger.info(f"Ph√¢n ph·ªëi k·∫øt qu·∫£ tr√™n t·ª´ng l·ªõp:\n{distribution_table.to_string()}")

# Trong notebook, d√πng display() s·∫Ω cho b·∫£ng ƒë·∫πp h∆°n
print("B·∫£ng ph√¢n ph·ªëi k·∫øt qu·∫£ tr√™n t·ª´ng l·ªõp:")
display(distribution_table)


2025-10-17 14:06:52 - [INFO] - Ph√¢n ph·ªëi k·∫øt qu·∫£ tr√™n t·ª´ng l·ªõp:
  true_label  correct  incorrect  total correct_rate incorrect_rate
0  extrinsic      341        120    461       73.97%         26.03%
1  intrinsic      368        122    490       75.10%         24.90%
2         no      380         69    449       84.63%         15.37%


B·∫£ng ph√¢n ph·ªëi k·∫øt qu·∫£ tr√™n t·ª´ng l·ªõp:


Unnamed: 0,true_label,correct,incorrect,total,correct_rate,incorrect_rate
0,extrinsic,341,120,461,73.97%,26.03%
1,intrinsic,368,122,490,75.10%,24.90%
2,no,380,69,449,84.63%,15.37%
