In [None]:
!pip install evaluate -q

[?25l   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/84.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m84.1/84.1 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModel, get_linear_schedule_with_warmup
from tqdm import tqdm
import json, os, numpy as np
import re, random
from collections import Counter

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MAX_LEN = 256
BATCH_SIZE = 8


In [34]:
class QAProcessorPhoBERT:
    def __init__(self, tokenizer, max_length=256):
        self.tok = tokenizer
        self.max_length = max_length

    def encode_example(self, context, question, answer):
        # 1. exact char span
        start_char = context.find(answer)
        if start_char == -1:
            return None
        end_char = start_char + len(answer)

        # 2. tokenize context but KEEP char alignment using regex split
        #    (PhoBERT's tokenize() breaks spacing, so we split manually into words)
        import re
        words = re.findall(r"\S+|\s+", context)

        tokens = []
        offsets = []
        char_idx = 0

        for w in words:
            if w.isspace():
                char_idx += len(w)
                continue

            sub_toks = self.tok.tokenize(w)
            for st in sub_toks:
                clean = st.lstrip("‚ñÅ")
                s = context.find(clean, char_idx)
                if s == -1:
                    # fallback to sequential char index
                    s = char_idx
                e = s + len(clean)
                tokens.append(st)
                offsets.append((s, e))
            char_idx = context.find(w, char_idx) + len(w)

        # map char span ‚Üí token span
        start_tok = end_tok = None
        for i, (s, e) in enumerate(offsets):
            if s <= start_char < e:
                start_tok = i
            if s < end_char <= e:
                end_tok = i

        if start_tok is None or end_tok is None:
            return None

        # encode pair
        enc = self.tok(
            question,
            context,
            truncation=True,
            padding="max_length",
            max_length=self.max_length,
            return_tensors=None,
            return_overflowing_tokens=False
        )

        enc.pop("token_type_ids", None)

        # question token count
        q_tokens = self.tok.tokenize(question)

        offset = 1 + len(q_tokens) + 1  # <s> Q </s>

        start_pos = start_tok + offset
        end_pos = end_tok + offset

        if end_pos >= self.max_length:
            return None

        enc["start_positions"] = start_pos
        enc["end_positions"] = end_pos
        return enc


In [29]:
class QADataset(Dataset):
    def __init__(self, data, tokenizer, max_length=256):
        self.processor = QAProcessorPhoBERT(tokenizer, max_length)
        self.features = []

        for ex in data:
            item = self.processor.encode_example(
                ex["context"], ex["question"], ex["answer"]
            )
            if item is not None:
                self.features.append(item)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        feat = self.features[idx]
        return {k: torch.tensor(v) for k, v in feat.items()}


In [32]:
import re

def normalize_text(s):
    """Chu·∫©n h√≥a ƒë·ªÉ lo·∫°i l·ªói kho·∫£ng tr·∫Øng, unicode, xu·ªëng d√≤ng."""
    if s is None:
        return ""

    s = s.replace("‚Äì", "-")      # normalize dash
    s = s.replace("‚Äî", "-")
    s = s.replace("‚Äú", "\"").replace("‚Äù", "\"")
    s = s.replace("‚Äô", "'")
    s = s.replace("‚Ä¶", "...")

    # b·ªè kho·∫£ng tr·∫Øng d∆∞
    s = re.sub(r"\s+", " ", s.strip())

    return s
from difflib import SequenceMatcher

def extract_best_span(context, answer):
    """N·∫øu answer kh√¥ng kh·ªõp EXACT, t√¨m ƒëo·∫°n t∆∞∆°ng t·ª± nh·∫•t trong context."""
    ctx = normalize_text(context)
    ans = normalize_text(answer)

    # 1) exact match
    if ans in ctx:
        return ans

    # 2) approx match (fuzzy)
    match = SequenceMatcher(None, ctx, ans).find_longest_match(
        0, len(ctx),
        0, len(ans)
    )

    span = ctx[match.a : match.a + match.size]

    # span h·ª£p l·ªá ph·∫£i c√≥ √≠t nh·∫•t 2 t·ª´
    if len(span.split()) >= 2:
        return span

    return None
import json

def clean_dataset_for_phobert(raw_data):
    cleaned = []
    dropped = []

    for ex in raw_data:
        ctx = normalize_text(ex["context"])
        ques = normalize_text(ex["question"])
        ans = normalize_text(ex["answer"])

        span = extract_best_span(ctx, ans)

        if span is None:
            dropped.append(ex)
        else:
            cleaned.append({
                "context": ctx,
                "question": ques,
                "answer": span
            })

    return cleaned, dropped
# Load file g·ªëc
with open("/content/drive/MyDrive/NLP/qa_dataset.json", "r", encoding="utf-8") as f:
    raw_data = json.load(f)

# CLEAN
cleaned_data, dropped_data = clean_dataset_for_phobert(raw_data)

print("üëâ Original:", len(raw_data))
print("üëâ Cleaned:", len(cleaned_data))
print("üëâ Dropped:", len(dropped_data))
print(f"üî• Keep ratio: {len(cleaned_data)/len(raw_data):.4f}")


üëâ Original: 25061
üëâ Cleaned: 24649
üëâ Dropped: 412
üî• Keep ratio: 0.9836


In [35]:
# # Load d·ªØ li·ªáu g·ªëc
# with open("/content/drive/MyDrive/NLP/qa_dataset.json", "r", encoding="utf-8") as f:
#     data = json.load(f)

# Shuffle ƒë·ªÉ ph√¢n ph·ªëi ƒë·ªÅu (r·∫•t quan tr·ªçng cho QA)
random.shuffle(cleaned_data)

n = len(cleaned_data)
train_data = cleaned_data[:int(0.8*n)]
val_data   = cleaned_data[int(0.8*n):int(0.9*n)]
test_data  = cleaned_data[int(0.9*n):]

print("Train:", len(train_data))
print("Val:", len(val_data))
print("Test:", len(test_data))

tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")

# D√πng dataset c≈© c·ªßa b·∫°n
train_ds = QADataset(train_data, tokenizer, MAX_LEN)
val_ds   = QADataset(val_data, tokenizer, MAX_LEN)
test_ds  = QADataset(test_data, tokenizer, MAX_LEN)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False)
print("Train:", len(train_ds), "Val:", len(val_ds), "Test:", len(test_ds))


Train: 19719
Val: 2465
Test: 2465


Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Train: 12275 Val: 1504 Test: 1548


In [37]:
import re

def normalize_text(s):
    return re.sub(r"\s+", " ", s.strip())

def check_dataset_errors(data, tokenizer, max_length=256):
    errors_L1 = []   # answer not in context
    errors_L2 = []   # tokenization mismatch
    errors_L3 = []   # truncated removed answer
    errors_L4 = []   # weird chars / format issues

    for idx, ex in enumerate(data):
        ctx = ex["context"]
        ques = ex["question"]
        ans = ex["answer"]

        # ============================
        # L1: answer kh√¥ng n·∫±m trong context g·ªëc
        # ============================
        if ans not in ctx:
            errors_L1.append(idx)
            continue

        # ============================
        # L2: Check tokenize ‚Üí reconstruct mismatch
        # ============================
        ctx_tokens = tokenizer.tokenize(ctx)

        reconstructed = ""
        for tk in ctx_tokens:
            if tk.startswith("‚ñÅ"):
                piece = tk[1:] if reconstructed == "" else " " + tk[1:]
            else:
                piece = tk
            reconstructed += piece

        if normalize_text(ans) not in normalize_text(reconstructed):
            errors_L2.append(idx)
            continue

        # ============================
        # L3: simulated truncate check
        # encode pair and see if end_pos is out of range
        # ============================
        enc = tokenizer(
            ques,
            ctx,
            truncation=True,
            padding="max_length",
            max_length=max_length,
            return_tensors=None
        )

        q_tokens = tokenizer.tokenize(ques)
        offset = 1 + len(q_tokens) + 1  # <s> Q </s>

        # t√¨m l·∫°i v·ªã tr√≠ answer trong reconstructed
        start_char = reconstructed.find(ans)
        end_char = start_char + len(ans)

        # map char ‚Üí reconstructed token index
        start_tok = end_tok = None
        curr = 0
        token_offsets = []
        for tk in ctx_tokens:
            if tk.startswith("‚ñÅ"):
                piece = tk[1:] if curr == 0 else " " + tk[1:]
            else:
                piece = tk
            s = curr
            curr += len(piece)
            e = curr
            token_offsets.append((s, e))

        for i,(s,e) in enumerate(token_offsets):
            if s <= start_char < e:
                start_tok = i
            if s < end_char <= e:
                end_tok = i

        if start_tok is None or end_tok is None:
            errors_L2.append(idx)
            continue

        start_pos = start_tok + offset
        end_pos = end_tok + offset

        if end_pos >= max_length:
            errors_L3.append(idx)
            continue

        # ============================
        # L4: detect weird characters
        # ============================
        if any(ord(c) > 60000 for c in ctx + ans):
            errors_L4.append(idx)

    return errors_L1, errors_L2, errors_L3, errors_L4


# üîç CH·∫†Y CHECK L·ªñI
L1, L2, L3, L4 = check_dataset_errors(cleaned_data, tokenizer, MAX_LEN)

print("======== DATASET ERROR REPORT ========")
print("L1. Answer not in context:", len(L1))
print("L2. Tokenization mismatch:", len(L2))
print("L3. Truncated (answer removed):", len(L3))
print("L4. Strange character errors:", len(L4))
print("TOTAL SAMPLES:", len(cleaned_data))
print("VALID SAMPLES:", len(cleaned_data) - (len(L1)+len(L2)+len(L3)+len(L4)))
print("======================================")


Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


L1. Answer not in context: 0
L2. Tokenization mismatch: 24351
L3. Truncated (answer removed): 0
L4. Strange character errors: 0
TOTAL SAMPLES: 24649
VALID SAMPLES: 298


In [38]:
class PhoBERTForQA(nn.Module):
    def __init__(self, name="vinai/phobert-base"):
        super().__init__()
        self.encoder = AutoModel.from_pretrained(name)
        H = self.encoder.config.hidden_size
        self.qa_head = nn.Linear(H, 2)

    def forward(self, input_ids, attention_mask, start_positions=None, end_positions=None):
        out = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
        last_hidden = out.last_hidden_state

        logits = self.qa_head(last_hidden)  # [B, L, 2]
        start_logits, end_logits = logits.split(1, dim=-1)

        start_logits = start_logits.squeeze(-1)
        end_logits = end_logits.squeeze(-1)

        loss = None
        if start_positions is not None:
            loss_fn = nn.CrossEntropyLoss()
            loss = loss_fn(start_logits, start_positions) + loss_fn(end_logits, end_positions)

        return {"loss": loss, "start_logits": start_logits, "end_logits": end_logits}


In [39]:
class QATrainer:
    def __init__(self, model, train_loader, val_loader, lr=1e-5, epochs=5, weight_decay=0.01):
        self.model = model.to(DEVICE)
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.epochs = epochs

        # Optimizer
        self.optimizer = torch.optim.AdamW(
            model.parameters(),
            lr=lr,
            weight_decay=weight_decay
        )

        # Scheduler (warmup 10%)
        warmup_steps = int(0.1 * len(train_loader) * epochs)
        total_steps = len(train_loader) * epochs

        self.scheduler = get_linear_schedule_with_warmup(
            self.optimizer,
            num_warmup_steps=warmup_steps,
            num_training_steps=total_steps
        )

    # ============================
    #        TRAIN EPOCH
    # ============================
    def train_epoch(self, epoch_idx):
        self.model.train()
        total_loss = 0.0

        progress = tqdm(self.train_loader, desc=f"Train Epoch {epoch_idx+1}/{self.epochs}")

        for batch in progress:
            batch = {k: v.to(DEVICE) for k, v in batch.items()}

            self.optimizer.zero_grad()
            out = self.model(**batch)
            loss = out["loss"]

            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)

            self.optimizer.step()
            self.scheduler.step()

            total_loss += loss.item()
            avg_loss = total_loss / (progress.n + 1)

            progress.set_postfix(loss=f"{avg_loss:.4f}")

        return total_loss / len(self.train_loader)

    # ============================
    #          VALIDATION
    # ============================
    @torch.no_grad()
    def val_epoch(self, epoch_idx):
        self.model.eval()
        total_loss = 0.0

        progress = tqdm(self.val_loader, desc=f"Val Epoch {epoch_idx+1}/{self.epochs}")

        for batch in progress:
            batch = {k: v.to(DEVICE) for k, v in batch.items()}
            out = self.model(**batch)
            total_loss += out["loss"].item()

        return total_loss / len(self.val_loader)

    # ============================
    #              FIT
    # ============================
    def fit(self):
        best_val = float("inf")
        best_state = None

        print("üöÄ B·∫Øt ƒë·∫ßu training PhoBERT QA...")

        for ep in range(self.epochs):
            train_loss = self.train_epoch(ep)
            val_loss = self.val_epoch(ep)

            print(f"\nEpoch {ep+1}/{self.epochs} | Train={train_loss:.4f} | Val={val_loss:.4f}")

            if val_loss < best_val:
                best_val = val_loss
                best_state = {k: v.cpu().clone() for k, v in self.model.state_dict().items()}

        # Load best checkpoint
        if best_state is not None:
            self.model.load_state_dict(best_state)
            print(f"\nüî• Loaded best checkpoint (val_loss={best_val:.4f})")

        return self.model


In [40]:
model = PhoBERTForQA()
trainer = QATrainer(model, train_loader, val_loader, lr=1e-5, epochs=5)

model = trainer.fit()


üöÄ B·∫Øt ƒë·∫ßu training PhoBERT QA...


Train Epoch 1/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1535/1535 [09:51<00:00,  2.59it/s, loss=5.8142]
Val Epoch 1/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 188/188 [00:20<00:00,  9.38it/s]



Epoch 1/5 | Train=5.8142 | Val=3.1910


Train Epoch 2/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1535/1535 [09:50<00:00,  2.60it/s, loss=2.9965]
Val Epoch 2/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 188/188 [00:19<00:00,  9.41it/s]



Epoch 2/5 | Train=2.9965 | Val=2.6745


Train Epoch 3/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1535/1535 [09:50<00:00,  2.60it/s, loss=2.3883]
Val Epoch 3/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 188/188 [00:19<00:00,  9.42it/s]



Epoch 3/5 | Train=2.3883 | Val=2.5735


Train Epoch 4/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1535/1535 [09:50<00:00,  2.60it/s, loss=2.0367]
Val Epoch 4/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 188/188 [00:19<00:00,  9.45it/s]



Epoch 4/5 | Train=2.0367 | Val=2.5920


Train Epoch 5/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1535/1535 [09:49<00:00,  2.60it/s, loss=1.8379]
Val Epoch 5/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 188/188 [00:19<00:00,  9.42it/s]



Epoch 5/5 | Train=1.8379 | Val=2.5970

üî• Loaded best checkpoint (val_loss=2.5735)


In [41]:
class ExtractiveQAModel:
    def __init__(self, model, tokenizer, max_length=256, top_k=8, max_answer_len=32):
        self.model = model
        self.tok = tokenizer
        self.max_length = max_length
        self.top_k = top_k
        self.max_answer_len = max_answer_len

    @torch.no_grad()
    def predict_span(self, ctx, ques):
        self.model.eval()

        enc = self.tok(
            ques, ctx,
            truncation=True,
            padding="max_length",
            max_length=self.max_length,
            return_tensors="pt",
        )
        enc.pop("token_type_ids", None)
        enc = {k: v.to(DEVICE) for k, v in enc.items()}

        out = self.model(**enc)
        s_log = out["start_logits"][0]
        e_log = out["end_logits"][0]

        s_top = torch.topk(s_log, self.top_k)
        e_top = torch.topk(e_log, self.top_k)

        best = (-1e10, 0, 0)

        for i, s_idx in enumerate(s_top.indices):
            for j, e_idx in enumerate(e_top.indices):
                s = s_idx.item()
                e = e_idx.item()

                if e < s: continue
                if (e - s + 1) > self.max_answer_len: continue

                score = s_top.values[i] + e_top.values[j]
                if score > best[0]:
                    best = (score, s, e)

        _, s, e = best
        ids = enc["input_ids"][0][s:e+1]
        return self.tok.decode(ids, skip_special_tokens=True)


In [42]:
def normalize(text):
    text = text.lower()
    text = re.sub(r"[^\w\s]", " ", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

def em(pred, gold):
    return int(normalize(pred) == normalize(gold))

def f1(pred, gold):
    pt = normalize(pred).split()
    gt = normalize(gold).split()
    if len(pt) == 0 or len(gt) == 0: return 0
    c = Counter(pt) & Counter(gt)
    num_same = sum(c.values())
    if num_same == 0: return 0
    precision = num_same / len(pt)
    recall = num_same / len(gt)
    return 2 * precision * recall / (precision + recall)

def evaluate_extractive(model, data):
    EM, F1 = [], []
    for ex in tqdm(data, desc="Eval"):
        pred = model.predict_span(ex["context"], ex["question"])
        EM.append(em(pred, ex["answer"]))
        F1.append(f1(pred, ex["answer"]))
    return np.mean(EM), np.mean(F1)


In [43]:
extractive = ExtractiveQAModel(model, tokenizer)

EM, F1 = evaluate_extractive(extractive, test_data)
print("PHOBERT FINAL ‚Äî EM:", EM, "F1:", F1)


Eval:  29%|‚ñà‚ñà‚ñâ       | 710/2465 [00:13<00:32, 53.80it/s]Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Eval:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2082/2465 [00:40<00:07, 52.46it/s]Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Eval: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2465/2465 [00:47<00:00, 51.54it/s]

PHOBERT FINAL ‚Äî EM: 0.017849898580121704 F1: 0.551192840657221





In [None]:
import random

def test_random_samples(extractive_model, dataset, num_samples=5):
    print(f"\nüîç Testing {num_samples} random samples...\n")
    samples = random.sample(dataset, num_samples)

    for i, ex in enumerate(samples):
        ctx   = ex["context"]
        ques  = ex["question"]
        gold  = ex["answer"]

        pred = extractive_model.predict_span(ctx, ques)

        print(f"===== SAMPLE {i+1} =====")
        print("Context:", ctx)
        print("Q:", ques)
        print("Pred:", pred)
        print("Gold:", gold)
        print()
test_random_samples(extractive, test_data, num_samples=5)



üîç Testing 5 random samples...

===== SAMPLE 1 =====
Q: ƒêi·ªÅu g√¨ c√≥ th·ªÉ x·∫£y ra n·∫øu kh√¥ng c√≥ s·ªë c·ªïng c·ªë ƒë·ªãnh ƒë∆∞·ª£c s·ª≠ d·ª•ng cho BOOTP?
Pred: , c√≥ th·ªÉ m√°y kh√°ch kh√°c ƒëang l·∫Øng nghe tr√™n c√πng m·ªôt c·ªïng nh∆∞ng mong ƒë·ª£i ƒëi·ªÅu g√¨ ƒë√≥ kh√°@@
Gold: N·∫øu kh√¥ng c√≥ s·ªë c·ªïng c·ªë ƒë·ªãnh ƒë∆∞·ª£c s·ª≠ d·ª•ng, c√≥ th·ªÉ m√°y kh√°ch kh√°c ƒëang l·∫Øng nghe tr√™n c√πng m·ªôt c·ªïng nh∆∞ng mong ƒë·ª£i ƒëi·ªÅu g√¨ ƒë√≥ kh√°c.

===== SAMPLE 2 =====
Q: T·ªïng n·ª£ c·ªßa Hoa K·ª≥ v√†o ng√†y 20 th√°ng 1 nƒÉm 2009 l√† bao nhi√™u?
Pred: l√† 10,63 ngh√¨n t·ª∑ ƒë√¥
Gold: 10,63 ngh√¨n t·ª∑ ƒë√¥ la.

===== SAMPLE 3 =====
Q: C√° heo v·∫°ch c√≤n ƒë∆∞·ª£c g·ªçi b·∫±ng nh·ªØng t√™n th√¥ng d·ª•ng n√†o kh√°c?
Pred: C√° heo xanh ƒëu√¥i ƒë·ªè, c√° heo ƒëu√¥i ƒë·ªè, c√° heo v·∫°ch hay c√° nanh
Gold: √° heo xanh ƒëu√¥i ƒë·ªè, c√° heo ƒëu√¥i ƒë·ªè, c√° 

===== SAMPLE 4 =====
Q: Nh·ªØng tuy·∫øn ƒë∆∞·ªùng n√†o ƒëi qua huy·ªán H·∫° Lang?
Pred: tr√™n t·ªânh l·ªô 207 v√† 

In [81]:
save_path = "/content/drive/MyDrive/NLP/phobert-qa-final"
os.makedirs(save_path, exist_ok=True)

torch.save(model.state_dict(), f"{save_path}/pytorch_model.bin")
with open(f"{save_path}/config.json","w") as f:
    json.dump(model.encoder.config.to_dict(), f)

tokenizer.save_pretrained(save_path)

print("Saved to", save_path)


Saved to /content/drive/MyDrive/NLP/phobert-qa-final
