In [1]:
!pip install sacrebleu

Collecting sacrebleu
  Downloading sacrebleu-2.5.1-py3-none-any.whl.metadata (51 kB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m51.8/51.8 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting portalocker (from sacrebleu)
  Downloading portalocker-3.2.0-py3-none-any.whl.metadata (8.7 kB)
Downloading sacrebleu-2.5.1-py3-none-any.whl (104 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m104.1/104.1 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading portalocker-3.2.0-py3-none-any.whl (22 kB)
Installing collected packages: portalocker, sacrebleu
Successfully installed portalocker-3.2.0 sacrebleu-2.5.1


In [2]:
import torch
import torch.nn as nn
import sentencepiece as spm
import os
import math
from tokenizers import Tokenizer
# --- C·∫§U H√åNH ---
# 1. Hyperparams (Ph·∫£i kh·ªõp 100% v·ªõi l√∫c train model Small)
D_MODEL = 256
N_HEAD = 4
D_FF = 1024
N_LAYERS = 4
DROP_PROB = 0.0 # Khi test th√¨ kh√¥ng c·∫ßn dropout
MAX_LEN = 256
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 2. ƒê∆∞·ªùng d·∫´n (S·ª≠a l·∫°i theo n∆°i b·∫°n upload file l√™n)
MODEL_PATH = "/kaggle/input/transformer-training-en2vi/transformer_small_en2vi_v2.pt"
BPE_MODEL_PATH = "/kaggle/input/tokenizer-iwslt/iwslt_bpe.model"
TEST_SRC_RAW = "/kaggle/input/iwslt15-englishvietnamese/IWSLT'15 en-vi/tst2013.en.txt" # File text th√¥
TEST_TRG_RAW = "/kaggle/input/iwslt15-englishvietnamese/IWSLT'15 en-vi/tst2013.vi.txt" # File text th√¥

# 3. Load Tokenizer
sp = spm.SentencePieceProcessor()
sp.load(BPE_MODEL_PATH)
PAD_IDX = sp.pad_id()
VOCAB_SIZE = sp.get_piece_size()

In [3]:
class TransformerEmbedding(nn.Module):
    def __init__(self, vocab_size, d_model, max_len, drop_prob):
        super().__init__()
        self.tok_emb = nn.Embedding(vocab_size, d_model)
        self.d_model = d_model
        
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1).float()
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe.unsqueeze(0))
        self.dropout = nn.Dropout(drop_prob)

    def forward(self, x):
        emb = self.tok_emb(x) * math.sqrt(self.d_model)
        pos = self.pe[:, :x.size(1)]
        return self.dropout(emb + pos)

In [4]:
class MultiHeadAttention(nn.Module):
    def __init__(self, d_model, n_head):
        super().__init__()
        self.d_model = d_model
        self.n_head = n_head
        self.head_dim = d_model // n_head
        
        # ƒê·∫£m b·∫£o d_model chia h·∫øt cho s·ªë head
        assert self.head_dim * n_head == d_model, "d_model ph·∫£i chia h·∫øt cho n_head"

        # 1. C√°c l·ªõp Linear ƒë·ªÉ chi·∫øu Q, K, V
        self.w_q = nn.Linear(d_model, d_model)
        self.w_k = nn.Linear(d_model, d_model)
        self.w_v = nn.Linear(d_model, d_model)
        
        # L·ªõp Linear cu·ªëi c√πng sau khi n·ªëi c√°c head l·∫°i
        self.w_o = nn.Linear(d_model, d_model)

    def forward(self, q, k, v, mask=None):
        """
        q, k, v shape: [Batch_Size, Seq_Len, d_model]
        mask shape: [Batch_Size, 1, 1, Seq_Len] ho·∫∑c [Batch_Size, 1, Seq_Len, Seq_Len]
        """
        batch_size = q.size(0)

        # 1. Chi·∫øu Q, K, V qua Linear layer
        # Sau ƒë√≥ t√°ch th√†nh n_head: [Batch, Seq, Head, Dim] -> [Batch, Head, Seq, Dim]
        # Transpose ƒë·ªÉ ƒë∆∞a chi·ªÅu Head l√™n tr∆∞·ªõc chi·ªÅu Seq -> ƒê·ªÉ nh√¢n ma tr·∫≠n song song c√°c head
        Q = self.w_q(q).view(batch_size, -1, self.n_head, self.head_dim).transpose(1, 2)
        K = self.w_k(k).view(batch_size, -1, self.n_head, self.head_dim).transpose(1, 2)
        V = self.w_v(v).view(batch_size, -1, self.n_head, self.head_dim).transpose(1, 2)

        # 2. T√≠nh Scaled Dot-Product Attention
        # Score = (Q * K^T) / sqrt(d_k)
        # K.transpose(-2, -1) l√† chuy·ªÉn v·ªã 2 chi·ªÅu cu·ªëi (Seq, Dim) -> (Dim, Seq)
        scores = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(self.head_dim)
        
        # 3. √Åp d·ª•ng Mask (N·∫øu c√≥)
        # Mask th∆∞·ªùng ch·ª©a 0 (che) v√† 1 (gi·ªØ). Ta thay v·ªã tr√≠ 0 b·∫±ng s·ªë √¢m v√¥ c√πng (-1e9)
        # ƒë·ªÉ khi qua Softmax n√≥ bi·∫øn th√†nh 0.
        if mask is not None:
            scores = scores.masked_fill(mask == 0, -1e9)
        
        # 4. Softmax ƒë·ªÉ ra x√°c su·∫•t
        attention_weights = torch.softmax(scores, dim=-1)
        
        # 5. Nh√¢n v·ªõi V
        # Output: [Batch, Head, Seq, Dim]
        output = torch.matmul(attention_weights, V)
        
        # 6. Gom c√°c head l·∫°i (Concatenate)
        # [Batch, Head, Seq, Dim] -> [Batch, Seq, Head, Dim] -> [Batch, Seq, d_model]
        output = output.transpose(1, 2).contiguous().view(batch_size, -1, self.d_model)
        
        # 7. ƒêi qua l·ªõp Linear cu·ªëi c√πng
        return self.w_o(output)

In [5]:
class PositionwiseFeedForward(nn.Module):
    def __init__(self, d_model, d_ff, drop_prob=0.1):
        super().__init__()
        # d_ff th∆∞·ªùng l·ªõn g·∫•p 4 l·∫ßn d_model (v√≠ d·ª•: 512 -> 2048)
        self.linear1 = nn.Linear(d_model, d_ff)
        self.linear2 = nn.Linear(d_ff, d_model)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=drop_prob)

    def forward(self, x):
        # x: [Batch, Seq_Len, d_model]
        x = self.linear1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.linear2(x)
        return x

In [6]:
class EncoderLayer(nn.Module):
    def __init__(self, d_model, n_head, d_ff, drop_prob=0.1):
        super().__init__()
        self.attention = MultiHeadAttention(d_model, n_head)
        self.norm1 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(drop_prob)
        
        self.ffn = PositionwiseFeedForward(d_model, d_ff, drop_prob)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout2 = nn.Dropout(drop_prob)

    def forward(self, x, mask=None):
        # 1. Sub-layer 1: Self Attention
        # L∆∞u l·∫°i x ban ƒë·∫ßu ƒë·ªÉ c·ªông (Residual Connection)
        _x = x
        x = self.attention(q=x, k=x, v=x, mask=mask) # Self-Attention: q=k=v=x
        x = self.dropout1(x)
        x = self.norm1(x + _x) # Add & Norm
        
        # 2. Sub-layer 2: Feed Forward
        _x = x
        x = self.ffn(x)
        x = self.dropout2(x)
        x = self.norm2(x + _x) # Add & Norm
        
        return x

In [7]:
class Encoder(nn.Module):
    def __init__(self, vocab_size, d_model, n_head, d_ff, n_layer, max_len, drop_prob, device):
        super().__init__()
        self.device = device
        
        # Embedding + Positional Encoding (ƒê√£ code ·ªü b√†i tr∆∞·ªõc)
        self.embedding = TransformerEmbedding(vocab_size, d_model, max_len, drop_prob)
        
        # Ch·ªìng N l·ªõp EncoderLayer
        self.layers = nn.ModuleList([
            EncoderLayer(d_model, n_head, d_ff, drop_prob) 
            for _ in range(n_layer)
        ])
        
    def forward(self, src, mask=None):
        # src: [Batch, Seq_Len]
        x = self.embedding(src)
        
        # Cho ƒëi qua l·∫ßn l∆∞·ª£t t·ª´ng l·ªõp Encoder
        for layer in self.layers:
            x = layer(x, mask)
        
        return x

In [8]:
class DecoderLayer(nn.Module):
    def __init__(self, d_model, n_head, d_ff, drop_prob=0.1):
        super().__init__()
        
        # 1. Self Attention (C√≥ Mask che t∆∞∆°ng lai)
        self.self_attention = MultiHeadAttention(d_model, n_head)
        self.norm1 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(drop_prob)
        
        # 2. Cross Attention (Quan tr·ªçng: L·∫•y Key, Value t·ª´ Encoder)
        self.cross_attention = MultiHeadAttention(d_model, n_head)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout2 = nn.Dropout(drop_prob)
        
        # 3. Feed Forward
        self.ffn = PositionwiseFeedForward(d_model, d_ff, drop_prob)
        self.norm3 = nn.LayerNorm(d_model)
        self.dropout3 = nn.Dropout(drop_prob)

    def forward(self, trg, enc_src, trg_mask, src_mask):
        """
        trg: Input c·ªßa Decoder (c√¢u ti·∫øng Vi·ªát ƒëang d·ªãch d·ªü)
        enc_src: Output t·ª´ Encoder (c√¢u ti·∫øng Anh ƒë√£ hi·ªÉu xong)
        trg_mask: Mask che t∆∞∆°ng lai cho trg
        src_mask: Mask che padding cho src
        """
        # --- Block 1: Masked Self-Attention ---
        # Decoder t·ª± nh√¨n l·∫°i ch√≠nh n√≥ (nh∆∞ng kh√¥ng ƒë∆∞·ª£c nh√¨n t∆∞∆°ng lai)
        _trg = trg
        # Quan tr·ªçng: trg_mask d√πng ·ªü ƒë√¢y
        trg = self.self_attention(q=trg, k=trg, v=trg, mask=trg_mask)
        trg = self.dropout1(trg)
        trg = self.norm1(trg + _trg) # Add & Norm

        # --- Block 2: Cross-Attention (Encoder-Decoder Attention) ---
        # Decoder l·∫•y th√¥ng tin t·ª´ Encoder
        # Query (Q) ƒë·∫øn t·ª´ Decoder (trg)
        # Key (K) v√† Value (V) ƒë·∫øn t·ª´ Encoder (enc_src)
        _trg = trg
        # Quan tr·ªçng: src_mask d√πng ·ªü ƒë√¢y (ƒë·ªÉ kh√¥ng nh√¨n v√†o padding c·ªßa ti·∫øng Anh)
        trg = self.cross_attention(q=trg, k=enc_src, v=enc_src, mask=src_mask)
        trg = self.dropout2(trg)
        trg = self.norm2(trg + _trg)

        # --- Block 3: Feed Forward ---
        _trg = trg
        trg = self.ffn(trg)
        trg = self.dropout3(trg)
        trg = self.norm3(trg + _trg)

        return trg

In [9]:
class Decoder(nn.Module):
    def __init__(self, vocab_size, d_model, n_head, d_ff, n_layer, max_len, drop_prob, device):
        super().__init__()
        self.device = device
        
        # Embedding ri√™ng cho Decoder (Ti·∫øng Vi·ªát)
        self.embedding = TransformerEmbedding(vocab_size, d_model, max_len, drop_prob)
        
        self.layers = nn.ModuleList([
            DecoderLayer(d_model, n_head, d_ff, drop_prob)
            for _ in range(n_layer)
        ])
        
        # L·ªõp Linear cu·ªëi c√πng ƒë·ªÉ d·ª± ƒëo√°n t·ª´ ti·∫øp theo
        self.fc_out = nn.Linear(d_model, vocab_size)

    def forward(self, trg, enc_src, trg_mask, src_mask):
        # trg: [Batch, Seq_Len]
        trg = self.embedding(trg)
        
        for layer in self.layers:
            trg = layer(trg, enc_src, trg_mask, src_mask)
            
        # Output: [Batch, Seq_Len, Vocab_Size]
        output = self.fc_out(trg)
        return output

In [10]:
class Transformer(nn.Module):
    def __init__(self, encoder, decoder, src_pad_idx, trg_pad_idx, device):
        super().__init__()
        
        self.encoder = encoder
        self.decoder = decoder
        self.src_pad_idx = src_pad_idx
        self.trg_pad_idx = trg_pad_idx
        self.device = device
        
    def make_src_mask(self, src):
        # src shape: [Batch, Src_Len]
        
        # T·∫°o mask cho v·ªã tr√≠ padding (True n·∫øu != pad, False n·∫øu == pad)
        # Ho·∫∑c ng∆∞·ª£c l·∫°i t√πy quy ∆∞·ªõc, ·ªü ƒë√¢y ta d√πng quy ∆∞·ªõc: 1 l√† gi·ªØ, 0 l√† che
        # unsqueeze(1) v√† (2) ƒë·ªÉ m·ªü r·ªông chi·ªÅu cho kh·ªõp v·ªõi Attention Heads
        # Shape mong mu·ªën: [Batch, 1, 1, Src_Len]
        src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)

        return src_mask.to(self.device)

    def make_trg_mask(self, trg):
        # trg shape: [Batch, Trg_Len]
        
        # 1. Padding Mask: Che c√°c v·ªã tr√≠ pad trong c√¢u ƒë√≠ch
        # Shape: [Batch, 1, 1, Trg_Len]
        trg_pad_mask = (trg != self.trg_pad_idx).unsqueeze(1).unsqueeze(2)
        
        # 2. Look-ahead Mask: Ma tr·∫≠n tam gi√°c
        trg_len = trg.shape[1]
        # torch.tril t·∫°o ma tr·∫≠n tam gi√°c d∆∞·ªõi (s·ªë 1 ·ªü d∆∞·ªõi ƒë∆∞·ªùng ch√©o, s·ªë 0 ·ªü tr√™n)
        trg_sub_mask = torch.tril(torch.ones((trg_len, trg_len), device=self.device)).bool()
        
        # 3. K·∫øt h·ª£p c·∫£ 2: V·ª´a ph·∫£i kh√¥ng ph·∫£i pad, v·ª´a ph·∫£i n·∫±m trong tam gi√°c d∆∞·ªõi
        # Shape: [Batch, 1, Trg_Len, Trg_Len]
        trg_mask = trg_pad_mask & trg_sub_mask
        
        return trg_mask.to(self.device)

    def forward(self, src, trg):
        """
        src: [Batch, Src_Len]
        trg: [Batch, Trg_Len] (L∆∞u √Ω: trg n√†y l√† Input cho Decoder, ƒë√£ b·ªè token cu·ªëi)
        """
        # 1. T·∫°o Mask
        src_mask = self.make_src_mask(src)
        trg_mask = self.make_trg_mask(trg)
        
        # 2. Ch·∫°y qua Encoder
        enc_src = self.encoder(src, src_mask)
        
        # 3. Ch·∫°y qua Decoder
        # L∆∞u √Ω: Decoder c·∫ßn c·∫£ src_mask ƒë·ªÉ tr√°nh Cross-Attention nh√¨n v√†o padding c·ªßa src
        output = self.decoder(trg, enc_src, trg_mask, src_mask)
        
        return output

In [11]:
# 4. Load Model
# (Copy l·∫°i class Encoder, Decoder, Transformer, MultiHeadAttention... v√†o cell tr√™n nh√©)
print("üèóÔ∏è ƒêang d·ª±ng l·∫°i ki·∫øn tr√∫c model...")
enc = Encoder(VOCAB_SIZE, D_MODEL, N_HEAD, D_FF, N_LAYERS, MAX_LEN, DROP_PROB, device)
dec = Decoder(VOCAB_SIZE, D_MODEL, N_HEAD, D_FF, N_LAYERS, MAX_LEN, DROP_PROB, device)
model = Transformer(enc, dec, PAD_IDX, PAD_IDX, device).to(device)

print(f"‚è≥ Loading weights t·ª´ {MODEL_PATH}...")
model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
model.eval()
print("‚úÖ Model ƒë√£ s·∫µn s√†ng chi·∫øn ƒë·∫•u!")

üèóÔ∏è ƒêang d·ª±ng l·∫°i ki·∫øn tr√∫c model...
‚è≥ Loading weights t·ª´ /kaggle/input/transformer-training-en2vi/transformer_small_en2vi_v2.pt...
‚úÖ Model ƒë√£ s·∫µn s√†ng chi·∫øn ƒë·∫•u!


In [12]:
import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import sentencepiece as spm
import html
import re
import unicodedata

# --- 1. ƒê·ªäNH NGHƒ®A H√ÄM CLEAN (Copy y nguy√™n logic c≈©) ---
def clean_text_final(text):
    if not isinstance(text, str): return ""
    
    # 1. Gi·∫£i m√£ HTML entities (&quot; -> ")
    text = html.unescape(text)
    
    # 2. X√≥a HTML Tags (<br>, <i>...)
    text = re.sub(r'<[^>]+>', '', text) 

    # 3. X√≥a URL
    text = re.sub(r'https?://\S+|www\.\S+', ' ', text)
    
    # 4. X√≥a K√Ω t·ª± Control
    text = text.replace('\xa0', ' ').replace('\u200b', '').replace('\ufeff', '')
    
    # 5. Chu·∫©n h√≥a Unicode
    text = unicodedata.normalize('NFC', text)
    
    # 6. X√≥a ngo·∫∑c ƒë·∫∑c bi·ªát
    text = re.sub(r'[„Äê„Äë„Äå„Äç„Äé„Äè‚Äú‚Äù‚Äò‚Äô‚Äú‚Äù]', '', text)

    # 7. X√≥a metadata trong ngo·∫∑c (Applause), (Laughter)...
    text = re.sub(r'\([^)]*\)', '', text) 
    text = re.sub(r'\[[^\]]*\]', '', text)
    
    # 8. Chu·∫©n h√≥a kho·∫£ng tr·∫Øng
    text = re.sub(r'\s+', ' ', text).strip()
    
    # 9. Lowercase (QUAN TR·ªåNG: v√¨ model h·ªçc ch·ªØ th∆∞·ªùng)
    text = text.lower()
    
    return text

# --- 2. DATASET X·ª¨ L√ù ON-THE-FLY ---
class RawTestDataset(Dataset):
    def __init__(self, src_file, trg_file, sp_model):
        self.sp = sp_model
        
        # ƒê·ªçc file raw
        with open(src_file, 'r', encoding='utf-8') as f:
            self.src_lines = f.readlines()
        with open(trg_file, 'r', encoding='utf-8') as f:
            self.trg_lines = f.readlines()
            
        print(f"üìÇ ƒê√£ load {len(self.src_lines)} d√≤ng raw.")

    def __len__(self):
        return len(self.src_lines)

    def __getitem__(self, idx):
        # L·∫•y d√≤ng raw
        raw_src = self.src_lines[idx]
        raw_trg = self.trg_lines[idx]
        
        # --- B∆Ø·ªöC QUAN TR·ªåNG: CLEAN DATA TR∆Ø·ªöC ---
        clean_src = clean_text_final(raw_src)
        clean_trg = clean_text_final(raw_trg) # Clean c·∫£ target ƒë·ªÉ t√≠nh BLEU cho chu·∫©n
        
        # --- Tokenize ---
        src_ids = self.sp.encode_as_ids(clean_src)
        trg_ids = self.sp.encode_as_ids(clean_trg)
        
        # --- Tensor ---
        src_tensor = torch.LongTensor([self.sp.bos_id()] + src_ids + [self.sp.eos_id()])
        trg_tensor = torch.LongTensor([self.sp.bos_id()] + trg_ids + [self.sp.eos_id()])
        
        return src_tensor, trg_tensor

# --- 3. T·∫†O LOADER ---
# H√†m collate (gi·ªØ nguy√™n)
PAD_IDX = sp.pad_id()
def collate_fn(batch):
    src_batch, trg_batch = zip(*batch)
    src_pad = pad_sequence(src_batch, padding_value=PAD_IDX, batch_first=True)
    trg_pad = pad_sequence(trg_batch, padding_value=PAD_IDX, batch_first=True)
    return src_pad, trg_pad

# Kh·ªüi t·∫°o
# --- 3. T·∫†O LOADER (S·ª¨A L·∫†I T√äN BI·∫æN) ---

# Kh·ªüi t·∫°o
test_dataset = RawTestDataset(TEST_SRC_RAW, TEST_TRG_RAW, sp)

# üëá ƒê·ªïi 'f' th√†nh 'test_loader' ·ªü ƒë√¢y
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)

print("‚úÖ Data Test ƒë√£ ƒë∆∞·ª£c Clean v√† Load th√†nh c√¥ng v√†o bi·∫øn 'test_loader'!")

print("‚úÖ Data Test ƒë√£ ƒë∆∞·ª£c Clean v√† Load th√†nh c√¥ng!")

üìÇ ƒê√£ load 1268 d√≤ng raw.
‚úÖ Data Test ƒë√£ ƒë∆∞·ª£c Clean v√† Load th√†nh c√¥ng v√†o bi·∫øn 'test_loader'!
‚úÖ Data Test ƒë√£ ƒë∆∞·ª£c Clean v√† Load th√†nh c√¥ng!


In [13]:
# import torch
# import torch.nn.functional as F
# import math
# import sacrebleu
# from comet import download_model, load_from_checkpoint
# from tqdm import tqdm
# import gc

# # ==============================================================================
# # PH·∫¶N 1: CORE BEAM SEARCH ALGORITHM (Thu·∫≠t to√°n c·ªët l√µi)
# # ==============================================================================
# def beam_search_decode(model, src, sp, device, max_len=128, beam_size=3, alpha=0.7):
#     """
#     H√†m Beam Search cho 1 c√¢u ƒë∆°n l·∫ª.
#     alpha=0.7: Length Penalty (c√†ng cao c√†ng ∆∞u ti√™n c√¢u d√†i).
#     """
#     model.eval()
    
#     # 1. Encode (Ch·ªâ ch·∫°y 1 l·∫ßn ƒë·ªÉ ti·∫øt ki·ªám th·ªùi gian)
#     src_mask = model.make_src_mask(src)
#     with torch.no_grad():
#         enc_src = model.encoder(src, src_mask)
    
#     # 2. Kh·ªüi t·∫°o Beam: [(score, [token_ids])]
#     beams = [(0.0, [sp.bos_id()])]
#     completed_sequences = []
    
#     for _ in range(max_len):
#         new_beams = []
        
#         for score, seq in beams:
#             # N·∫øu c√¢u ƒë√£ k·∫øt th√∫c
#             if seq[-1] == sp.eos_id():
#                 completed_sequences.append((score, seq))
#                 continue
                
#             # Decode b∆∞·ªõc ti·∫øp theo
#             trg_tensor = torch.LongTensor(seq).unsqueeze(0).to(device)
#             trg_mask = model.make_trg_mask(trg_tensor)
            
#             with torch.no_grad():
#                 output = model.decoder(trg_tensor, enc_src, trg_mask, src_mask)
#                 prediction = output[:, -1, :] # L·∫•y token cu·ªëi
#                 log_probs = F.log_softmax(prediction, dim=1).squeeze(0)
                
#                 # L·∫•y Top-K ·ª©ng vi√™n
#                 topk_log_probs, topk_indices = torch.topk(log_probs, beam_size)
                
#             # M·ªü r·ªông nh√°nh
#             for i in range(beam_size):
#                 token_idx = topk_indices[i].item()
#                 token_log_prob = topk_log_probs[i].item()
#                 new_beams.append((score + token_log_prob, seq + [token_idx]))
        
#         # Gi·ªØ l·∫°i K nh√°nh t·ªët nh·∫•t
#         new_beams.sort(key=lambda x: x[0], reverse=True)
#         beams = new_beams[:beam_size]
        
#         # D·ª´ng s·ªõm n·∫øu t·∫•t c·∫£ c√°c nh√°nh ƒë·ªÅu ƒë√£ g·∫∑p EOS
#         if all([seq[-1] == sp.eos_id() for _, seq in beams]):
#             completed_sequences.extend(beams)
#             break
            
#     # 3. Ch·ªçn k·∫øt qu·∫£ t·ªët nh·∫•t (√Åp d·ª•ng Length Penalty)
#     if len(completed_sequences) == 0: completed_sequences = beams
        
#     final_candidates = []
#     for score, seq in completed_sequences:
#         length = len(seq)
#         # C√¥ng th·ª©c ph·∫°t ƒë·ªô d√†i: Score / (Length^alpha)
#         lp_score = score / (math.pow(length, alpha))
#         final_candidates.append((lp_score, seq))
        
#     final_candidates.sort(key=lambda x: x[0], reverse=True)
#     return final_candidates[0][1] # Tr·∫£ v·ªÅ sequence c√≥ ƒëi·ªÉm cao nh·∫•t

# # ==============================================================================
# # PH·∫¶N 2: H√ÄM SINH D·ªÆ LI·ªÜU D√ôNG BEAM SEARCH
# # ==============================================================================
# def generate_translations_beam(model, data_loader, sp, device, beam_size=3, max_len=128):
#     """
#     Ch·∫°y Beam Search tr√™n to√†n b·ªô t·∫≠p d·ªØ li·ªáu ƒë·ªÉ l·∫•y text.
#     """
#     model.eval()
#     sources = [] 
#     hypotheses = []
#     references = []
    
#     print(f"‚è≥ ƒêang ch·∫°y BEAM SEARCH (k={beam_size}) tr√™n {len(data_loader.dataset)} c√¢u...")
#     print("‚ö†Ô∏è C·∫£nh b√°o: Qu√° tr√¨nh n√†y s·∫Ω m·∫•t kho·∫£ng 30-45 ph√∫t. Vui l√≤ng ki√™n nh·∫´n...")
    
#     with torch.no_grad():
#         for src, trg in tqdm(data_loader, desc="Beam Decoding", mininterval=30):
#             src = src.to(device)
#             trg = trg.to(device)
            
#             # --- 1. L·∫•y Reference & Source (X·ª≠ l√Ω batch) ---
#             src_list = src.cpu().numpy().tolist()
#             trg_list = trg.cpu().numpy().tolist()
            
#             for s in src_list:
#                 clean_s = [x for x in s if x not in [sp.pad_id(), sp.bos_id(), sp.eos_id()]]
#                 sources.append(sp.decode_ids(clean_s))
                
#             for t in trg_list:
#                 clean_t = [x for x in t if x not in [sp.pad_id(), sp.bos_id(), sp.eos_id()]]
#                 references.append(sp.decode_ids(clean_t))
            
#             # --- 2. Beam Search t·ª´ng c√¢u trong Batch ---
#             batch_size = src.shape[0]
#             for i in range(batch_size):
#                 single_src = src[i].unsqueeze(0) # [1, seq_len]
                
#                 # G·ªçi h√†m Beam Search ·ªü tr√™n
#                 best_seq = beam_search_decode(model, single_src, sp, device, max_len, beam_size, alpha=0.7)
                
#                 # Clean chu·ªói k·∫øt qu·∫£ (B·ªè BOS/EOS)
#                 if best_seq[0] == sp.bos_id(): best_seq = best_seq[1:]
#                 if best_seq and best_seq[-1] == sp.eos_id(): best_seq = best_seq[:-1]
                
#                 hypotheses.append(sp.decode_ids(best_seq))
                
#     print(f"‚úÖ ƒê√£ tr√≠ch xu·∫•t xong {len(hypotheses)} c√¢u b·∫±ng Beam Search.")
#     return sources, hypotheses, references

# # ==============================================================================
# # PH·∫¶N 3: T√çNH ƒêI·ªÇM (Kh√¥ng thay ƒë·ªïi)
# # ==============================================================================
# def calculate_all_metrics(sources, hypotheses, references):
#     print("\nüöÄ ƒêang t√≠nh to√°n c√°c ch·ªâ s·ªë ƒë√°nh gi√° (BLEU, chrF, COMET)...")
    
#     # 1. BLEU
#     bleu = sacrebleu.corpus_bleu(hypotheses, [references])
#     # 2. chrF
#     chrf = sacrebleu.corpus_chrf(hypotheses, [references])
    
#     # 3. COMET
#     print("‚è≥ ƒêang t·∫£i model COMET...")
#     torch.cuda.empty_cache(); gc.collect()
    
#     comet_score = 0
#     try:
#         model_path = download_model("Unbabel/wmt22-comet-da")
#         model_comet = load_from_checkpoint(model_path)
#         data = [{"src": s, "mt": h, "ref": r} for s, h, r in zip(sources, hypotheses, references)]
        
#         print("‚è≥ ƒêang ch·∫•m ƒëi·ªÉm COMET...")
#         # batch_size=32 an to√†n cho VRAM 16GB, n·∫øu l·ªói gi·∫£m xu·ªëng 16 ho·∫∑c 8
#         output = model_comet.predict(data, batch_size=32, gpus=1) 
#         comet_score = output.system_score * 100
#     except Exception as e:
#         print(f"‚ö†Ô∏è L·ªói COMET: {e}")

#     print("\n" + "="*50)
#     print("üèÜ FINAL RESULTS (BEAM SEARCH k=3)")
#     print("-" * 50)
#     print(f"üîπ BLEU  : {bleu.score:.2f}")
#     print(f"üîπ chrF  : {chrf.score:.2f}")
#     print(f"üîπ COMET : {comet_score:.2f}")
#     print("="*50)

# # ==============================================================================
# # CH·∫†Y (MAIN EXECUTION)
# # ==============================================================================

# # 1. Ch·∫°y Beam Search ƒë·ªÉ l·∫•y d·ªØ li·ªáu (L√¢u nh·∫•t ·ªü b∆∞·ªõc n√†y)
# src_texts, hyp_texts, ref_texts = generate_translations_beam(
#     model, test_loader, sp, device, 
#     beam_size=1,  # B·∫°n c√≥ th·ªÉ tƒÉng l√™n 5 n·∫øu mu·ªën, nh∆∞ng s·∫Ω ch·∫≠m h∆°n
#     max_len=128   # Gi·ªØ nguy√™n 150 nh∆∞ b·∫°n ƒë√£ test l√∫c n√£y
# )

# # 2. T√≠nh ƒëi·ªÉm
# calculate_all_metrics(src_texts, hyp_texts, ref_texts)

In [14]:
import torch
import torch.nn.functional as F
import math
import sacrebleu
import nltk
from nltk.translate.meteor_score import meteor_score
from tqdm import tqdm
import gc

# T·∫£i d·ªØ li·ªáu t·ª´ ƒëi·ªÉn cho METEOR (ch·ªâ c·∫ßn ch·∫°y 1 l·∫ßn)
nltk.download('wordnet', quiet=True)
nltk.download('omw-1.4', quiet=True)

# ==============================================================================
# PH·∫¶N 1: BEAM SEARCH DECODE (Gi·ªØ nguy√™n c·∫•u h√¨nh k=3, max_len=128)
# ==============================================================================
def beam_search_decode(model, src, sp, device, max_len=128, beam_size=3, alpha=0.7):
    model.eval()
    src_mask = model.make_src_mask(src)
    with torch.no_grad():
        enc_src = model.encoder(src, src_mask)
    
    beams = [(0.0, [sp.bos_id()])]
    completed_sequences = []
    
    for _ in range(max_len):
        new_beams = []
        for score, seq in beams:
            if seq[-1] == sp.eos_id():
                completed_sequences.append((score, seq))
                continue
            
            trg_tensor = torch.LongTensor(seq).unsqueeze(0).to(device)
            trg_mask = model.make_trg_mask(trg_tensor)
            
            with torch.no_grad():
                output = model.decoder(trg_tensor, enc_src, trg_mask, src_mask)
                prediction = output[:, -1, :]
                log_probs = F.log_softmax(prediction, dim=1).squeeze(0)
                topk_log_probs, topk_indices = torch.topk(log_probs, beam_size)
                
            for i in range(beam_size):
                idx = topk_indices[i].item()
                prob = topk_log_probs[i].item()
                new_beams.append((score + prob, seq + [idx]))
        
        new_beams.sort(key=lambda x: x[0], reverse=True)
        beams = new_beams[:beam_size]
        
        if all([seq[-1] == sp.eos_id() for _, seq in beams]):
            completed_sequences.extend(beams)
            break
            
    if len(completed_sequences) == 0: completed_sequences = beams
    
    # Length Penalty
    final_candidates = []
    for score, seq in completed_sequences:
        lp_score = score / (math.pow(len(seq), alpha))
        final_candidates.append((lp_score, seq))
    final_candidates.sort(key=lambda x: x[0], reverse=True)
    return final_candidates[0][1]

# ==============================================================================
# PH·∫¶N 2: CH·∫†Y BEAM SEARCH ƒê·ªÇ L·∫§Y TEXT
# ==============================================================================
def generate_beam_text_only(model, data_loader, sp, device, beam_size=3, max_len=128):
    model.eval()
    hypotheses = []
    references = []
    
    print(f"‚è≥ ƒêang ch·∫°y BEAM SEARCH (k={beam_size}, max_len={max_len})...")
    print("‚ö†Ô∏è C·∫£nh b√°o: S·∫Ω m·∫•t kho·∫£ng 30-45 ph√∫t. ƒê·ª´ng t·∫Øt tr√¨nh duy·ªát!")
    
    with torch.no_grad():
        for src, trg in tqdm(data_loader, desc="Beam Decoding", mininterval=30):
            src = src.to(device)
            trg = trg.to(device)
            
            # --- L·∫•y Reference (ƒê√°p √°n) ---
            trg_list = trg.cpu().numpy().tolist()
            for t in trg_list:
                clean_t = [x for x in t if x not in [sp.pad_id(), sp.bos_id(), sp.eos_id()]]
                references.append(sp.decode_ids(clean_t))
            
            # --- Beam Search Hypothesis (M√°y d·ªãch) ---
            batch_size = src.shape[0]
            for i in range(batch_size):
                single_src = src[i].unsqueeze(0)
                best_seq = beam_search_decode(model, single_src, sp, device, max_len, beam_size, alpha=0.7)
                
                # B·ªè BOS/EOS
                if best_seq[0] == sp.bos_id(): best_seq = best_seq[1:]
                if best_seq and best_seq[-1] == sp.eos_id(): best_seq = best_seq[:-1]
                
                hypotheses.append(sp.decode_ids(best_seq))
                
    return hypotheses, references

# ==============================================================================
# PH·∫¶N 3: CH·ªà T√çNH TER V√Ä METEOR
# ==============================================================================
def calculate_ter_meteor(hypotheses, references):
    print("\nüöÄ ƒêang t√≠nh to√°n TER v√† METEOR...")
    
    # 1. T√≠nh TER (Translation Edit Rate)
    # TER th·∫•p = T·ªët
    ter = sacrebleu.corpus_ter(hypotheses, [references])
    
    # 2. T√≠nh METEOR
    # METEOR cao = T·ªët
    print("‚è≥ ƒêang t√≠nh METEOR (NLTK)...")
    meteor_scores = []
    for hyp, ref in zip(hypotheses, references):
        # NLTK y√™u c·∫ßu list c√°c t·ª´ (tokenized list)
        hyp_tokens = hyp.split() 
        ref_tokens = ref.split()
        score = meteor_score([ref_tokens], hyp_tokens)
        meteor_scores.append(score)
    
    final_meteor = (sum(meteor_scores) / len(meteor_scores)) * 100

    print("\n" + "="*50)
    print("üìä K·∫æT QU·∫¢ B·ªî SUNG (BEAM SEARCH k=3)")
    print("-" * 50)
    print(f"üîπ TER    : {ter.score:.2f}  (C√†ng TH·∫§P c√†ng t·ªët)")
    print(f"üîπ METEOR : {final_meteor:.2f}  (C√†ng CAO c√†ng t·ªët)")
    print("="*50)

# ==============================================================================
# MAIN RUN
# ==============================================================================

# 1. Ch·∫°y Beam Search l·∫•y text
hyp_txt, ref_txt = generate_beam_text_only(
    model, test_loader, sp, device, 
    beam_size=3, 
    max_len=128
)

# 2. T√≠nh ch·ªâ s·ªë
calculate_ter_meteor(hyp_txt, ref_txt)

‚è≥ ƒêang ch·∫°y BEAM SEARCH (k=3, max_len=128)...
‚ö†Ô∏è C·∫£nh b√°o: S·∫Ω m·∫•t kho·∫£ng 30-45 ph√∫t. ƒê·ª´ng t·∫Øt tr√¨nh duy·ªát!


Beam Decoding: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 40/40 [21:57<00:00, 32.94s/it]



üöÄ ƒêang t√≠nh to√°n TER v√† METEOR...
‚è≥ ƒêang t√≠nh METEOR (NLTK)...

üìä K·∫æT QU·∫¢ B·ªî SUNG (BEAM SEARCH k=3)
--------------------------------------------------
üîπ TER    : 51.70  (C√†ng TH·∫§P c√†ng t·ªët)
üîπ METEOR : 57.38  (C√†ng CAO c√†ng t·ªët)


In [15]:
# import torch
# import sacrebleu
# from comet import download_model, load_from_checkpoint
# from tqdm import tqdm
# import gc

# # ==============================================================================
# # PH·∫¶N 1: H√ÄM SINH D·ªÆ LI·ªÜU (Generate Source, Hypothesis, Reference)
# # ==============================================================================
# def generate_translations(model, data_loader, sp, device, max_len=128):
#     """
#     H√†m n√†y ch·∫°y model (Greedy) ƒë·ªÉ l·∫•y ra 3 list vƒÉn b·∫£n c·∫ßn thi·∫øt cho vi·ªác ch·∫•m ƒëi·ªÉm.
#     """
#     model.eval()
    
#     sources = []    # C√¢u g·ªëc (Input cho COMET)
#     hypotheses = [] # M√°y d·ªãch (MT)
#     references = [] # ƒê√°p √°n chu·∫©n (Ref)
    
#     print(f"‚è≥ ƒêang decode d·ªØ li·ªáu (Greedy Search) tr√™n {len(data_loader.dataset)} c√¢u...")
    
#     with torch.no_grad():
#         for src, trg in tqdm(data_loader, desc="Extracting", mininterval=30):
#             src = src.to(device)
#             trg = trg.to(device)
            
#             # --- 1. Decode ---
#             # Encode
#             src_mask = model.make_src_mask(src)
#             enc_src = model.encoder(src, src_mask)
            
#             # Greedy Decode Loop
#             batch_size = src.shape[0]
#             trg_indexes = torch.tensor([[sp.bos_id()]] * batch_size).to(device)
            
#             for i in range(max_len):
#                 trg_mask = model.make_trg_mask(trg_indexes)
#                 output = model.decoder(trg_indexes, enc_src, trg_mask, src_mask)
#                 pred_token = output.argmax(2)[:,-1].unsqueeze(1)
#                 trg_indexes = torch.cat((trg_indexes, pred_token), dim=1)
                
#             # --- 2. Convert to Text ---
#             src_list = src.cpu().numpy().tolist()
#             trg_pred_list = trg_indexes.cpu().numpy().tolist()
#             trg_real_list = trg.cpu().numpy().tolist()
            
#             for i in range(batch_size):
#                 # A. L·∫•y Source Text (C·∫ßn thi·∫øt cho COMET)
#                 src_ids = [t for t in src_list[i] if t not in [sp.pad_id(), sp.bos_id(), sp.eos_id()]]
#                 sources.append(sp.decode_ids(src_ids))
                
#                 # B. L·∫•y Hypothesis (M√°y d·ªãch)
#                 pred_ids = trg_pred_list[i][1:] # B·ªè BOS
#                 if sp.eos_id() in pred_ids:
#                     pred_ids = pred_ids[:pred_ids.index(sp.eos_id())]
#                 hypotheses.append(sp.decode_ids(pred_ids))
                
#                 # C. L·∫•y Reference (ƒê√°p √°n)
#                 real_ids = [t for t in trg_real_list[i] if t not in [sp.pad_id(), sp.bos_id(), sp.eos_id()]]
#                 references.append(sp.decode_ids(real_ids))
                
#     print(f"‚úÖ ƒê√£ tr√≠ch xu·∫•t xong {len(sources)} b·ªô d·ªØ li·ªáu.")
#     return sources, hypotheses, references

# # ==============================================================================
# # PH·∫¶N 2: H√ÄM T√çNH ƒêI·ªÇM (BLEU, chrF, COMET)
# # ==============================================================================
# def calculate_all_metrics(sources, hypotheses, references):
#     print("\nüöÄ ƒêang t√≠nh to√°n c√°c ch·ªâ s·ªë ƒë√°nh gi√°...")
    
#     # --- 1. BLEU Score ---
#     bleu = sacrebleu.corpus_bleu(hypotheses, [references])
    
#     # --- 2. chrF Score ---
#     chrf = sacrebleu.corpus_chrf(hypotheses, [references])
    
#     # --- 3. COMET Score ---
#     print("‚è≥ ƒêang t·∫£i model COMET (wmt22-comet-da)...")
    
#     # Clean GPU memory
#     torch.cuda.empty_cache()
#     gc.collect()
    
#     comet_score = 0
#     try:
#         # T·∫£i model (t·ª± ƒë·ªông cache)
#         model_path = download_model("Unbabel/wmt22-comet-da")
#         model = load_from_checkpoint(model_path)
        
#         # Chu·∫©n b·ªã d·ªØ li·ªáu: List of Dictionary
#         data = [
#             {"src": s, "mt": h, "ref": r} 
#             for s, h, r in zip(sources, hypotheses, references)
#         ]
        
#         # Predict (Batch size nh·ªè ƒë·ªÉ tr√°nh OOM)
#         print("‚è≥ ƒêang ch·∫°y model COMET ƒë·ªÉ ch·∫•m ƒëi·ªÉm...")
#         model_output = model.predict(data, batch_size=16, gpus=1)
#         comet_score = model_output.system_score * 100 # Scale l√™n 100 cho d·ªÖ nh√¨n
        
#     except Exception as e:
#         print(f"‚ö†Ô∏è L·ªói COMET: {e}")
#         print("üëâ C√≥ th·ªÉ do h·∫øt VRAM ho·∫∑c ch∆∞a b·∫≠t Internet.")

#     # --- IN B√ÅO C√ÅO ---
#     print("\n" + "="*45)
#     print("üìä B·∫¢NG T·ªîNG S·∫ÆP K·∫æT QU·∫¢ D·ªäCH (FINAL REPORT)")
#     print("-" * 45)
#     print(f"üîπ BLEU  : {bleu.score:.2f}  (Chu·∫©n ng·ªØ ph√°p/t·ª´ v·ª±ng)")
#     print(f"üîπ chrF  : {chrf.score:.2f}  (Chu·∫©n h√¨nh th√°i t·ª´/k√Ω t·ª±)")
#     print(f"üîπ COMET : {comet_score:.2f}  (Chu·∫©n ng·ªØ nghƒ©a - Quan tr·ªçng nh·∫•t)")
#     print("="*45)

# # ==============================================================================
# # CH·∫†Y TH√îI (MAIN EXECUTION)
# # ==============================================================================

# # 1. Sinh d·ªØ li·ªáu vƒÉn b·∫£n t·ª´ Model (D√πng Greedy cho nhanh)
# # N·∫øu b·∫°n mu·ªën t√≠nh cho Beam Search, b·∫°n ph·∫£i d√πng code Beam Search ƒë·ªÉ t·∫°o ra list hypotheses tr∆∞·ªõc
# src_texts, hyp_texts, ref_texts = generate_translations(model, test_loader, sp, device)

# # 2. T√≠nh t·∫•t c·∫£ ch·ªâ s·ªë
# calculate_all_metrics(src_texts, hyp_texts, ref_texts)

In [16]:
# # GREEDY SEARCH

# import sacrebleu
# from tqdm import tqdm
# import torch

# def calculate_sacrebleu(model, data_loader, sp, device, max_len=128):
#     model.eval()
    
#     # SacreBLEU c·∫ßn:
#     # 1. List c√°c c√¢u model d·ªãch (Hypotheses)
#     # 2. List c√°c c√¢u ƒë√°p √°n (References)
#     hypotheses = []
#     references = []
    
#     print("‚è≥ ƒêang d·ªãch v√† t√≠nh SacreBLEU (Ch·ªù ch√∫t nh√©)...")
    
#     with torch.no_grad():
#         for src, trg in tqdm(data_loader, desc="Decoding", mininterval=30):
#             src = src.to(device)
#             trg = trg.to(device)
            
#             # --- 1. Greedy Search (Ch·∫°y nhanh ƒë·ªÉ l·∫•y m·∫´u t√≠nh ƒëi·ªÉm) ---
#             # L∆∞u √Ω: K·∫øt qu·∫£ Greedy th∆∞·ªùng th·∫•p h∆°n Beam Search kho·∫£ng 1-2 ƒëi·ªÉm
            
#             # Encode
#             src_mask = model.make_src_mask(src)
#             enc_src = model.encoder(src, src_mask)
            
#             # Decode loop
#             batch_size = src.shape[0]
#             trg_indexes = torch.tensor([[sp.bos_id()]] * batch_size).to(device)
            
#             for i in range(max_len):
#                 trg_mask = model.make_trg_mask(trg_indexes)
#                 output = model.decoder(trg_indexes, enc_src, trg_mask, src_mask)
#                 pred_token = output.argmax(2)[:,-1].unsqueeze(1)
#                 trg_indexes = torch.cat((trg_indexes, pred_token), dim=1)
            
#             # --- 2. Convert ID -> Text ---
#             trg_pred_list = trg_indexes.cpu().numpy().tolist()
#             trg_real_list = trg.cpu().numpy().tolist()
            
#             for i in range(batch_size):
#                 # A. X·ª≠ l√Ω c√¢u Model d·ªãch
#                 pred_ids = trg_pred_list[i][1:] # B·ªè BOS
#                 if sp.eos_id() in pred_ids:
#                     pred_ids = pred_ids[:pred_ids.index(sp.eos_id())]
                
#                 # Quan tr·ªçng: decode_ids s·∫Ω n·ªëi l·∫°i th√†nh c√¢u ho√†n ch·ªânh
#                 pred_text = sp.decode_ids(pred_ids) 
#                 hypotheses.append(pred_text)
                
#                 # B. X·ª≠ l√Ω c√¢u ƒê√°p √°n (Reference)
#                 real_ids = trg_real_list[i]
#                 # L·ªçc b·ªè PAD, BOS, EOS ƒë·ªÉ l·∫•y n·ªôi dung g·ªëc
#                 real_ids = [t for t in real_ids if t not in [sp.pad_id(), sp.bos_id(), sp.eos_id()]]
#                 real_text = sp.decode_ids(real_ids)
#                 references.append(real_text)

#     # --- 3. T√≠nh ƒëi·ªÉm ---
#     # SacreBLEU nh·∫≠n v√†o: list_hypotheses, [list_references]
#     # L∆∞u √Ω references ph·∫£i n·∫±m trong 1 list n·ªØa (v√¨ 1 c√¢u c√≥ th·ªÉ c√≥ nhi·ªÅu ƒë√°p √°n tham kh·∫£o)
#     score = sacrebleu.corpus_bleu(hypotheses, [references])
    
#     return score.score

# # --- CH·∫†Y ---
# bleu_score = calculate_sacrebleu(model, test_loader, sp, device)

# print(f"\n" + "="*40)
# print(f"üåü SACREBLEU SCORE: {bleu_score:.2f}")
# print(f"="*40)

In [17]:
# import torch
# import math
# import sacrebleu
# from tqdm import tqdm
# import torch.nn.functional as F

# # --- 1. H√ÄM CORE: BEAM SEARCH DECODE (Ch·∫°y cho 1 c√¢u) ---
# def beam_search_decode(model, src, sp, device, max_len=128, beam_size=3, alpha=0.7):
#     """
#     Th·ª±c hi·ªán Beam Search cho m·ªôt c√¢u ngu·ªìn (Batch size = 1)
#     alpha: Length Penalty (0.6 - 0.7 th∆∞·ªùng d√πng). Alpha c√†ng l·ªõn c√†ng ∆∞u ti√™n c√¢u d√†i.
#     """
#     model.eval()
    
#     # 1. Encode (Ch·ªâ ch·∫°y 1 l·∫ßn)
#     src_mask = model.make_src_mask(src)
#     with torch.no_grad():
#         enc_src = model.encoder(src, src_mask)
    
#     # 2. Kh·ªüi t·∫°o Beam
#     # M·ªói ph·∫ßn t·ª≠ trong beam l√† 1 tuple: (score, list_token_ids)
#     # Score ban ƒë·∫ßu l√† 0, token b·∫Øt ƒë·∫ßu l√† BOS
#     beams = [(0.0, [sp.bos_id()])]
    
#     # Danh s√°ch c√°c c√¢u ƒë√£ ho√†n th√†nh (g·∫∑p EOS)
#     completed_sequences = []
    
#     for _ in range(max_len):
#         new_beams = []
        
#         for score, seq in beams:
#             # N·∫øu c√¢u n√†y ƒë√£ k·∫øt th√∫c ·ªü b∆∞·ªõc tr∆∞·ªõc (d√π ch∆∞a ƒë∆∞·ª£c ƒë∆∞a v√†o completed), b·ªè qua
#             if seq[-1] == sp.eos_id():
#                 completed_sequences.append((score, seq))
#                 continue
                
#             # Chu·∫©n b·ªã input cho Decoder
#             trg_tensor = torch.LongTensor(seq).unsqueeze(0).to(device) # [1, curr_len]
#             trg_mask = model.make_trg_mask(trg_tensor)
            
#             # Decode
#             with torch.no_grad():
#                 output = model.decoder(trg_tensor, enc_src, trg_mask, src_mask)
#                 # output: [1, curr_len, vocab_size]
                
#                 # L·∫•y d·ª± ƒëo√°n ·ªü b∆∞·ªõc cu·ªëi c√πng
#                 prediction = output[:, -1, :] # [1, vocab_size]
                
#                 # T√≠nh Log Softmax
#                 log_probs = F.log_softmax(prediction, dim=1).squeeze(0) # [vocab_size]
                
#                 # L·∫•y Top-K ·ª©ng vi√™n t·ªët nh·∫•t hi·ªán t·∫°i
#                 topk_log_probs, topk_indices = torch.topk(log_probs, beam_size)
                
#             # M·ªü r·ªông Beam
#             for i in range(beam_size):
#                 token_idx = topk_indices[i].item()
#                 token_log_prob = topk_log_probs[i].item()
                
#                 new_score = score + token_log_prob
#                 new_seq = seq + [token_idx]
                
#                 new_beams.append((new_score, new_seq))
        
#         # S·∫Øp x·∫øp v√† l·∫•y K beam t·ªët nh·∫•t cho b∆∞·ªõc ti·∫øp theo
#         # S·∫Øp x·∫øp gi·∫£m d·∫ßn theo Score
#         new_beams.sort(key=lambda x: x[0], reverse=True)
#         beams = new_beams[:beam_size]
        
#         # N·∫øu t·∫•t c·∫£ c√°c beam ƒë·ªÅu ƒë√£ g·∫∑p EOS th√¨ d·ª´ng s·ªõm
#         if all([seq[-1] == sp.eos_id() for _, seq in beams]):
#             completed_sequences.extend(beams)
#             break
            
#     # --- 3. Ch·ªçn k·∫øt qu·∫£ t·ªët nh·∫•t (Apply Length Penalty) ---
#     # N·∫øu kh√¥ng c√≥ c√¢u n√†o ho√†n th√†nh (tr∆∞·ªùng h·ª£p max_len), l·∫•y lu√¥n beams hi·ªán t·∫°i
#     if len(completed_sequences) == 0:
#         completed_sequences = beams
        
#     final_candidates = []
#     for score, seq in completed_sequences:
#         # C√¥ng th·ª©c Length Penalty: Score / (Length ^ alpha)
#         # V√¨ Score l√† s·ªë √¢m (log_prob), n√™n chia cho length s·∫Ω l√†m n√≥ "√≠t √¢m" h∆°n (l·ªõn h∆°n)
#         length = len(seq)
#         lp_score = score / (math.pow(length, alpha))
#         final_candidates.append((lp_score, seq))
        
#     # S·∫Øp x·∫øp l·∫°i l·∫ßn cu·ªëi
#     final_candidates.sort(key=lambda x: x[0], reverse=True)
    
#     # L·∫•y token ids c·ªßa ·ª©ng vi√™n s·ªë 1
#     best_seq = final_candidates[0][1]
    
#     return best_seq

# # --- 2. H√ÄM CH√çNH: T√çNH BLEU TR√äN TO√ÄN B·ªò FILE ---
# def calculate_bleu_beam_search(model, data_loader, sp, device, beam_size=3, max_len=128):
#     model.eval()
#     hypotheses = []
#     references = []
    
#     print(f"‚è≥ ƒêang ch·∫°y Beam Search (k={beam_size}, max_len={max_len}) tr√™n to√†n b·ªô t·∫≠p Test...")
#     print("‚ö†Ô∏è C·∫£nh b√°o: Beam Search ch·∫°y ch·∫≠m h∆°n Greedy kho·∫£ng 3-5 l·∫ßn (M·∫•t kho·∫£ng 30-45 ph√∫t)...")
    
#     with torch.no_grad():
#         # Mininterval=30 ƒë·ªÉ log ƒë·ª° b·ªã spam, 30s c·∫≠p nh·∫≠t 1 l·∫ßn
#         for src, trg in tqdm(data_loader, desc="Beam Decoding", mininterval=30):
#             src = src.to(device)
#             trg = trg.to(device)
            
#             # --- A. L·∫•y Reference (ƒê√°p √°n) ---
#             trg_list = trg.cpu().numpy().tolist()
#             for t in trg_list:
#                 # L·ªçc b·ªè token r√°c
#                 real_ids = [x for x in t if x not in [sp.pad_id(), sp.bos_id(), sp.eos_id()]]
#                 references.append(sp.decode_ids(real_ids))
            
#             # --- B. Beam Search t·ª´ng c√¢u ---
#             batch_size = src.shape[0]
#             for i in range(batch_size):
#                 # L·∫•y 1 c√¢u ra, th√™m dim ƒë·ªÉ th√†nh [1, seq_len]
#                 single_src = src[i].unsqueeze(0) 
                
#                 # G·ªçi h√†m Beam Search Core
#                 pred_ids = beam_search_decode(
#                     model, single_src, sp, device, 
#                     max_len=max_len, 
#                     beam_size=beam_size, 
#                     alpha=0.7 # Ph·∫°t ƒë·ªô d√†i (quan tr·ªçng ƒë·ªÉ BLEU cao)
#                 )
                
#                 # B·ªè token BOS ƒë·∫ßu ti√™n n·∫øu c√≥ (th∆∞·ªùng beam search b·∫Øt ƒë·∫ßu t·ª´ BOS)
#                 if pred_ids[0] == sp.bos_id():
#                     pred_ids = pred_ids[1:]
                
#                 # N·∫øu c√≥ EOS ·ªü cu·ªëi th√¨ b·ªè ƒëi (ƒë·ªÉ decode cho s·∫°ch)
#                 if pred_ids and pred_ids[-1] == sp.eos_id():
#                     pred_ids = pred_ids[:-1]
                
#                 pred_text = sp.decode_ids(pred_ids)
#                 hypotheses.append(pred_text)
                
#     # --- C. T√≠nh ƒëi·ªÉm ---
#     print("\n‚úÖ Decoding ho√†n t·∫•t! ƒêang t√≠nh BLEU...")
#     score = sacrebleu.corpus_bleu(hypotheses, [references])
#     return score.score

# # ==========================================
# # --- CH·∫†Y TH√îI ---
# # ==========================================
# # L∆∞u √Ω: Ch·∫Øc ch·∫Øn b·∫°n ƒë√£ load model ngon l√†nh c√†nh ƒë√†o
# final_beam_bleu = calculate_bleu_beam_search(model, test_loader, sp, device, beam_size=3, max_len=128)

# print(f"\n" + "="*40)
# print(f"üî• FINAL BEAM SEARCH BLEU: {final_beam_bleu:.2f}")
# print(f"="*40)

In [18]:
# pip install -q -U google-genai

In [19]:
# from google import genai

# # Truy·ªÅn API Key tr·ª±c ti·∫øp v√†o ƒë√¢y
# client = genai.Client(api_key="AIzaSyAsOHchTqzmf2bNTniUAno286es8rnrj48")

# response = client.models.generate_content(
#     model="gemini-2.5-flash", # Ho·∫∑c gemini-2.0-flash-exp
#     contents="Explain how AI works"
# )
# print(response.text)

In [20]:
# import torch
# from tqdm import tqdm

# def get_data_for_evaluation(model, data_loader, sp, device, max_len=50):
#     model.eval()
    
#     sources = []    # C√¢u g·ªëc
#     hypotheses = [] # M√°y d·ªãch
#     references = [] # ƒê√°p √°n chu·∫©n
    
#     print("‚è≥ ƒêang decode d·ªØ li·ªáu ƒë·ªÉ chu·∫©n b·ªã ch·∫•m ƒëi·ªÉm...")
    
#     with torch.no_grad():
#         for src, trg in tqdm(data_loader, desc="Extracting Text", mininterval=30):
#             src = src.to(device)
#             trg = trg.to(device)
            
#             # --- 1. Decode (Greedy Search) ---
#             src_mask = model.make_src_mask(src)
#             enc_src = model.encoder(src, src_mask)
            
#             batch_size = src.shape[0]
#             trg_indexes = torch.tensor([[sp.bos_id()]] * batch_size).to(device)
            
#             for i in range(max_len):
#                 trg_mask = model.make_trg_mask(trg_indexes)
#                 output = model.decoder(trg_indexes, enc_src, trg_mask, src_mask)
#                 pred_token = output.argmax(2)[:,-1].unsqueeze(1)
#                 trg_indexes = torch.cat((trg_indexes, pred_token), dim=1)
            
#             # --- 2. Convert ID -> Text ---
#             src_list = src.cpu().numpy().tolist()
#             trg_pred_list = trg_indexes.cpu().numpy().tolist()
#             trg_real_list = trg.cpu().numpy().tolist()
            
#             for i in range(batch_size):
#                 # A. Source (C√¢u g·ªëc) - C·∫ßn lo·∫°i b·ªè PAD/BOS/EOS
#                 src_ids = [t for t in src_list[i] if t not in [sp.pad_id(), sp.bos_id(), sp.eos_id()]]
#                 sources.append(sp.decode_ids(src_ids))

#                 # B. Hypothesis (M√°y d·ªãch)
#                 pred_ids = trg_pred_list[i][1:] # B·ªè BOS ƒë·∫ßu
#                 if sp.eos_id() in pred_ids:
#                     pred_ids = pred_ids[:pred_ids.index(sp.eos_id())]
#                 hypotheses.append(sp.decode_ids(pred_ids))
                
#                 # C. Reference (ƒê√°p √°n)
#                 real_ids = [t for t in trg_real_list[i] if t not in [sp.pad_id(), sp.bos_id(), sp.eos_id()]]
#                 references.append(sp.decode_ids(real_ids))

#     print(f"‚úÖ ƒê√£ tr√≠ch xu·∫•t xong {len(sources)} c√¢u.")
#     return sources, hypotheses, references

In [21]:
# import time
# from google import genai
# import re
# import random
# import json

# # üëá API Key
# MY_API_KEY = "AIzaSyAsOHchTqzmf2bNTniUAno286es8rnrj48"

# def calculate_score_batching(sources, hypotheses, references, api_key, total_samples=30, batch_size=5):
#     """
#     Chi·∫øn thu·∫≠t: G·ª≠i 5 c√¢u m·ªôt l√∫c ƒë·ªÉ ti·∫øt ki·ªám request.
#     30 c√¢u ch·ªâ t·ªën 6 request -> Kh√¥ng bao gi·ªù b·ªã ch·∫∑n.
#     """
#     client = genai.Client(api_key=api_key)
#     model_id = "gemini-2.5-flash"
    
#     # 1. Ch·ªçn m·∫´u ng·∫´u nhi√™n
#     N = len(sources)
#     actual_samples = min(total_samples, N)
#     indices = random.sample(range(N), actual_samples)
    
#     # Gom data theo index ƒë√£ ch·ªçn
#     selected_src = [sources[i] for i in indices]
#     selected_hyp = [hypotheses[i] for i in indices]
#     selected_ref = [references[i] for i in indices]
    
#     all_scores = []
    
#     print(f"\nüöÄ ƒêang ch·∫•m {actual_samples} c√¢u theo c∆° ch·∫ø BATCHING (Gom {batch_size} c√¢u/l·∫ßn)...")
#     print("-" * 65)
#     print(f"{'Batch':<6} | {'Status':<30} | {'Scores Received'}")
#     print("-" * 65)

#     # 2. V√≤ng l·∫∑p t·ª´ng Batch
#     for i in range(0, actual_samples, batch_size):
#         # C·∫Øt l√°t d·ªØ li·ªáu (Slicing)
#         batch_src = selected_src[i : i + batch_size]
#         batch_hyp = selected_hyp[i : i + batch_size]
#         batch_ref = selected_ref[i : i + batch_size]
        
#         # T·∫°o prompt gom nhi·ªÅu c√¢u
#         content_text = ""
#         for j in range(len(batch_src)):
#             content_text += f"""
#             C√¢u {j+1}:
#             - G·ªëc: "{batch_src[j]}"
#             - ƒê√°p √°n: "{batch_ref[j]}"
#             - M√°y d·ªãch: "{batch_hyp[j]}"
#             """
            
#         prompt = f"""
#         B·∫°n l√† gi√°m kh·∫£o ch·∫•m thi. H√£y ch·∫•m ƒëi·ªÉm {len(batch_src)} b·∫£n d·ªãch tr√™n thang 0-100.
        
#         D·ªØ li·ªáu c·∫ßn ch·∫•m:
#         {content_text}
        
#         Y√äU C·∫¶U QUAN TR·ªåNG:
#         - Ch·ªâ tr·∫£ v·ªÅ m·ªôt list Python ch·ª©a c√°c con s·ªë t∆∞∆°ng ·ª©ng.
#         - V√≠ d·ª• output mong mu·ªën: [85, 90, 70, 100, 60]
#         - Kh√¥ng gi·∫£i th√≠ch g√¨ th√™m.
#         """
        
#         try:
#             # G·ªçi API
#             response = client.models.generate_content(
#                 model=model_id, 
#                 contents=prompt
#             )
            
#             # X·ª≠ l√Ω k·∫øt qu·∫£ tr·∫£ v·ªÅ
#             text = response.text
#             # T√¨m m·∫£ng s·ªë trong text (VD: [80, 90...])
#             match = re.search(r'\[.*?\]', text)
#             if match:
#                 # Parse string th√†nh list
#                 scores = json.loads(match.group())
#                 # ƒê·∫£m b·∫£o l√† list s·ªë nguy√™n
#                 scores = [int(s) for s in scores]
#                 all_scores.extend(scores)
                
#                 print(f"{i//batch_size + 1:<6} | ‚úÖ ƒê√£ ch·∫•m xong {len(scores)} c√¢u       | {scores}")
#             else:
#                 # Fallback: N·∫øu model kh√¥ng tr·∫£ v·ªÅ list, d√πng regex t√¨m t·∫•t c·∫£ s·ªë
#                 nums = re.findall(r'\d+', text)
#                 nums = [int(n) for n in nums if int(n) <= 100][:len(batch_src)]
#                 all_scores.extend(nums)
#                 print(f"{i//batch_size + 1:<6} | ‚ö†Ô∏è Regex fallback ({len(nums)} c√¢u)   | {nums}")

#             # Ngh·ªâ nh·∫π 2s (R·∫•t an to√†n v√¨ 1 request x·ª≠ l√Ω ƒë∆∞·ª£c 5 c√¢u r·ªìi)
#             time.sleep(2)
            
#         except Exception as e:
#             print(f"Batch {i//batch_size + 1}: ‚ùå L·ªói - {e}")
#             time.sleep(5) # N·∫øu l·ªói th√¨ ngh·ªâ l√¢u ch√∫t

#     # 3. T·ªïng k·∫øt
#     if not all_scores: return 0
#     avg_score = sum(all_scores) / len(all_scores)
    
#     print("-" * 65)
#     print(f"‚úÖ ƒê√£ ch·∫•m t·ªïng c·ªông: {len(all_scores)}/{actual_samples} c√¢u")
#     return avg_score

# # --- CH·∫†Y ---
# if 'src_texts' in globals():
#     # Ch·∫•m 30 c√¢u, m·ªói l·∫ßn g·ª≠i 5 c√¢u => Ch·ªâ m·∫•t 6 request (API Free ch·ªãu t·ªët)
#     final_score = calculate_score_batching(src_texts, hyp_texts, ref_texts, MY_API_KEY, total_samples=30, batch_size=5)
#     print(f"\n" + "="*40)
#     print(f"üåü GEMINI BATCH SCORE: {final_score:.2f} / 100")
#     print(f"="*40)