In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/btl-nlp-cleandata/test_cleaned.jsonl
/kaggle/input/btl-nlp-cleandata/train_cleaned.jsonl
/kaggle/input/btl-nlp-cleandata/__results__.html
/kaggle/input/btl-nlp-cleandata/__notebook__.ipynb
/kaggle/input/btl-nlp-cleandata/__output__.json
/kaggle/input/btl-nlp-cleandata/custom.css
/kaggle/input/btl-nlp-cleandata/.virtual_documents/__notebook_source__.ipynb


In [2]:
from datasets import load_dataset
from datasets import DatasetDict


# Load data
data_files = {
    "train": "/kaggle/input/btl-nlp-cleandata/train_cleaned.jsonl",
    "test": "/kaggle/input/btl-nlp-cleandata/test_cleaned.jsonl"
}
dataset = load_dataset("json", data_files=data_files)

# Create train/validation split (90/10 or 95/5)
train_val_split = dataset['train'].train_test_split(
    test_size=0.05,  # 5% for validation
    seed=42,
    shuffle=True  # IMPORTANT: Shuffle the data
)

# Reassemble into final dataset structure
dataset = DatasetDict({
    'train': train_val_split['train'],
    'validation': train_val_split['test'],  # Note: this is validation, not test
    'test': dataset['test']
})

print("="*60)
print("Final dataset structure:")
print(dataset)
print(f"  Train: {len(dataset['train'])} pairs")
print(f"  Validation: {len(dataset['validation'])} pairs")
print(f"  Test: {len(dataset['test'])} pairs")
print("="*60)

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Final dataset structure:
DatasetDict({
    train: Dataset({
        features: ['en', 'vi'],
        num_rows: 305942
    })
    validation: Dataset({
        features: ['en', 'vi'],
        num_rows: 16103
    })
    test: Dataset({
        features: ['en', 'vi'],
        num_rows: 2793
    })
})
  Train: 305942 pairs
  Validation: 16103 pairs
  Test: 2793 pairs


In [3]:
import random

def inspect_dataset(dataset, num_samples=5):
    """Inspect random samples from dataset"""
    print("\n" + "="*60)
    print("DATASET INSPECTION")
    print("="*60)
    
    # Check for None/empty values
    for split in dataset.keys():
        print(f"\n{split.upper()} split:")
        print(f"  Total examples: {len(dataset[split])}")
        
        # Sample random examples
        indices = random.sample(range(len(dataset[split])), min(num_samples, len(dataset[split])))
        
        for idx in indices:
            example = dataset[split][idx]
            en = example['en']
            vi = example['vi']
            
            print(f"\n  Example {idx}:")
            print(f"    EN ({len(en.split())} words): {en[:100]}...")
            print(f"    VI ({len(vi.split())} words): {vi[:100]}...")
            
            # Sanity checks
            if not en or not vi:
                print("    ⚠️  WARNING: Empty field detected!")
            if len(en.split()) < 3 or len(vi.split()) < 3:
                print("    ⚠️  WARNING: Very short sentence!")

inspect_dataset(dataset)


DATASET INSPECTION

TRAIN split:
  Total examples: 305942

  Example 4464:
    EN (28 words): Background: Robot-assisted surgery was first employed by the end of 1980s, and has since become popu...
    VI (34 words): Đặt vấn đề: Phẫu thuật có hổ trợ robot lần đầu tiên được tiến hành tại Mỹ vào cuối thập niên 1980 và...

  Example 28853:
    EN (6 words): Plasma homocysteine levels in lupus erythematosus...
    VI (10 words): Nồng độ homocysteine huyết tương trên bệnh nhânlupus ban đỏ...

  Example 202115:
    EN (22 words): Effects of low dose chitosan combined MAP packaging on respiratory rate, disease prevention and colo...
    VI (30 words): Ảnh hưởng chitosan liều thấp kết hợp bao bì MAP đến tốc độ hô hấp, tỉ lệ nấm bệnh và màu sắc vỏ tron...

  Example 251292:
    EN (22 words): Results: In Vĩnh Kim and Ia Kor, malaria is transmitted all - year round, with one high peak from Se...
    VI (25 words): Kết quả: Tại Vĩnh Kim và Ia Kor mùa truyền bệnh sốt rét quanh năm, thường có 1 đỉ

In [4]:
def check_data_leakage(dataset):
    """Check if test examples appear in train"""
    print("\n" + "="*60)
    print("CHECKING DATA LEAKAGE")
    print("="*60)
    
    train_en = set(ex['en'] for ex in dataset['train'])
    test_en = set(ex['en'] for ex in dataset['test'])
    
    overlap = train_en & test_en
    
    print(f"Train unique EN sentences: {len(train_en)}")
    print(f"Test unique EN sentences: {len(test_en)}")
    print(f"Overlap: {len(overlap)}")
    
    if overlap:
        print(f"⚠️  WARNING: {len(overlap)} test sentences also in train!")
        print("Sample overlapping sentences:")
        for sent in list(overlap)[:3]:
            print(f"  - {sent[:100]}...")
    else:
        print("✅ No data leakage detected")

check_data_leakage(dataset)


CHECKING DATA LEAKAGE
Train unique EN sentences: 304773
Test unique EN sentences: 2793
Overlap: 1646
Sample overlapping sentences:
  - Common clinical features: fatigue (99.4%); anorexia, poor appetite (97.1%); collateral circulation (...
  - Subject and method: A cross-sectional study on 600 inpatients at Hue University of Medicine and Phar...
  - Results: 42.6% of mothers had knowledge that the route of transmission was from eating or drinking c...


In [5]:
def remove_train_overlaps(dataset):
    print("\n" + "="*60)
    print("REMOVING TRAIN ITEMS THAT ALSO APPEAR IN TEST (HF DATASET)")
    print("="*60)

    # Extract English sentences from test split
    test_en = set(dataset['test']['en'])   # fast and HF compatible

    old_size = len(dataset['train'])

    # Use .filter() to keep only examples not in test
    dataset['train'] = dataset['train'].filter(
        lambda ex: ex['en'] not in test_en
    )

    new_size = len(dataset['train'])
    removed = old_size - new_size

    print(f"Original train size: {old_size}")
    print(f"New train size     : {new_size}")
    print(f"Removed from train : {removed}")

    if removed > 0:
        print("✅ Train cleaned and test set unchanged.")
    else:
        print("No overlaps found.")

    return dataset
dataset = remove_train_overlaps(dataset)

# Reassemble final dataset
dataset = DatasetDict({
    'train': dataset['train'],
    'validation': train_val_split['test'],  # your val set
    'test': dataset['test']
})



REMOVING TRAIN ITEMS THAT ALSO APPEAR IN TEST (HF DATASET)


Filter:   0%|          | 0/305942 [00:00<?, ? examples/s]

Original train size: 305942
New train size     : 304286
Removed from train : 1656
✅ Train cleaned and test set unchanged.


In [6]:
from tokenizers import Tokenizer, models, trainers, pre_tokenizers, normalizers, processors
from transformers import PreTrainedTokenizerFast
from datasets import DatasetDict
from tokenizers import decoders

def train_medical_tokenizer(
    dataset: DatasetDict,
    vocab_size: int = 32000,
    save_path: str = "tokenizer_medical_envi"
):
    """
    Train a robust byte-level BPE tokenizer optimized for EN-VI medical translation.
    """

    print("=" * 70)
    print("        TRAINING EN–VI MEDICAL BYTE-LEVEL TOKENIZER")
    print("=" * 70)

    # --------------------------------------------------------
    # 1. Build corpus generator
    # --------------------------------------------------------
    print("\n1. Preparing training corpus...")

    train_iter = dataset["train"].to_iterable_dataset()

    def corpus_generator():
        for ex in train_iter:
            if "en" in ex and ex["en"]:
                yield ex["en"]
            if "vi" in ex and ex["vi"]:
                yield ex["vi"]

    # --------------------------------------------------------
    # 2. Create tokenizer
    # --------------------------------------------------------
    print("2. Initializing byte-level BPE tokenizer...")

    tokenizer = Tokenizer(
    models.BPE(unk_token="<unk>", byte_fallback=True)
    )
    # --- normalization pipeline ---
    tokenizer.normalizer = normalizers.Sequence([
        normalizers.NFD(),       # fully decompose (Vietnamese safe)
        normalizers.Lowercase(), # lowercase everything
        normalizers.NFC(),       # recompose
        normalizers.Strip(),     # remove leading/trailing whitespace
    ])

    # --- byte-level pretokenizer ---
    tokenizer.pre_tokenizer = pre_tokenizers.ByteLevel(add_prefix_space=True)
    # Add this line to your tokenizer setup
    tokenizer.decoder = decoders.ByteLevel()

    # --------------------------------------------------------
    # 3. Trainer configuration
    # --------------------------------------------------------
    print("3. Configuring BPE trainer...")

    special_tokens = [
        "<pad>", "<unk>", "<s>", "</s>",
        "<en>", "<vi>"   # language tags
    ]

    trainer = trainers.BpeTrainer(
        vocab_size=vocab_size,
        min_frequency=2,
        special_tokens=special_tokens,
        initial_alphabet=pre_tokenizers.ByteLevel.alphabet(),
        show_progress=True,
    )
    # --------------------------------------------------------
    # 4. Training
    # --------------------------------------------------------
    print(f"4. Training tokenizer on corpus (vocab={vocab_size})...")
    tokenizer.train_from_iterator(corpus_generator(), trainer=trainer)

    # --------------------------------------------------------
    # 5. Post-processing (add <s> and </s> automatically)
    # --------------------------------------------------------
    tokenizer.post_processor = processors.TemplateProcessing(
        single="<s> $A </s>",
        pair="<s> $A </s> <s> $B </s>",
        special_tokens=[
            ("<s>", tokenizer.token_to_id("<s>")),
            ("</s>", tokenizer.token_to_id("</s>")),
        ],
    )

    # --------------------------------------------------------
    # 6. Wrap as HF tokenizer
    # --------------------------------------------------------
    wrapped = PreTrainedTokenizerFast(
        tokenizer_object=tokenizer,
        bos_token="<s>",
        eos_token="</s>",
        pad_token="<pad>",
        unk_token="<unk>",
        additional_special_tokens=["<en>", "<vi>"],
        model_max_length=512,
    )

    # --------------------------------------------------------
    # 7. Save
    # --------------------------------------------------------
    print(f"\n5. Saving tokenizer to: {save_path}/")
    wrapped.save_pretrained(save_path)

    # --------------------------------------------------------
    # 8. Test samples
    # --------------------------------------------------------
    print("\n6. Testing tokenizer with sample medical text:")

    tests = [
        "The patient has SARS-CoV-2 infection.",
        "Bệnh nhân bị viêm phổi do vi khuẩn Streptococcus pneumoniae.",
        "CT scan shows pulmonary edema and pleural effusion.",
        "Huyết áp 120/80 mmHg, SpO2 95%, nhịp tim 110 bpm.",
    ]

    for t in tests:
        print("\nText:", t)
        print("Tokens:", wrapped.tokenize(t))
        print("IDs:", wrapped.encode(t))

    print("\n" + "=" * 70)
    print("✅ Tokenizer trained successfully!")
    print(f"   Final vocab size: {wrapped.vocab_size}")
    print(f"   Saved to: {save_path}/")
    print("=" * 70)

    return wrapped
tokenizer = train_medical_tokenizer( dataset, vocab_size=32000, save_path="medical_envi_tokenizer" )

        TRAINING EN–VI MEDICAL BYTE-LEVEL TOKENIZER

1. Preparing training corpus...
2. Initializing byte-level BPE tokenizer...
3. Configuring BPE trainer...
4. Training tokenizer on corpus (vocab=32000)...




5. Saving tokenizer to: medical_envi_tokenizer/

6. Testing tokenizer with sample medical text:

Text: The patient has SARS-CoV-2 infection.
Tokens: ['the', 'Ġpatient', 'Ġhas', 'Ġsars', '-', 'cov', '-', '2', 'Ġinfection', '.']
IDs: [2, 2407, 1060, 1140, 5216, 18, 4875, 18, 23, 1063, 19, 3]

Text: Bệnh nhân bị viêm phổi do vi khuẩn Streptococcus pneumoniae.
Tokens: ['bá»ĩnh', 'ĠnhÃ¢n', 'Ġbá»ĭ', 'ĠviÃªm', 'Ġphá»ķi', 'Ġdo', 'Ġvi', 'Ġkhuáº©n', 'Ġstreptococcus', 'Ġpneumoniae', '.']
IDs: [2, 5736, 415, 793, 741, 973, 573, 470, 1186, 6793, 4084, 19, 3]

Text: CT scan shows pulmonary edema and pleural effusion.
Tokens: ['ct', 'Ġscan', 'Ġshows', 'Ġpulmonary', 'Ġedema', 'Ġand', 'Ġpleural', 'Ġeffusion', '.']
IDs: [2, 1120, 3313, 4075, 2043, 3871, 327, 4940, 5104, 19, 3]

Text: Huyết áp 12

In [7]:
text = dataset["train"][0]["vi"]
tokens = tokenizer.tokenize(text)
ids = tokenizer.convert_tokens_to_ids(tokens)

print("Text:", text)
print("Tokens:", tokens)
print("IDs:", ids)


Text: Ở trẻ sơ sinh bị hẹp van động mạch phổi nghiêm trọng, xanh tím xuất hiện do luồng thông ở tâm nhĩ từ phải sang trái thông qua lỗ bầu dục.
Tokens: ['á»Ł', 'Ġtráº»', 'ĠsÆ¡', 'Ġsinh', 'Ġbá»ĭ', 'Ġháº¹p', 'Ġvan', 'ĠÄĳá»Ļng', 'Ġmáº¡ch', 'Ġphá»ķi', 'ĠnghiÃªm', 'Ġtrá»įng', ',', 'Ġxanh', 'ĠtÃŃm', 'Ġxuáº¥t', 'Ġhiá»ĩn', 'Ġdo', 'Ġluá»ĵng', 'ĠthÃ´ng', 'Ġá»Ł', 'ĠtÃ¢m', 'ĠnhÄ©', 'Ġtá»«', 'Ġpháº£i', 'Ġsang', 'ĠtrÃ¡i', 'ĠthÃ´ng', 'Ġqua', 'Ġlá»Ĺ', 'Ġbáº§u', 'Ġdá»¥c', '.']
IDs: [399, 773, 1753, 581, 793, 2416, 2066, 684, 645, 973, 2749, 1330, 17, 5261, 5919, 1117, 624, 573, 10344, 1007, 446, 1132, 2669, 549, 1037, 3317, 2161, 1007, 1041, 3277, 11881, 2029, 19]


In [8]:
# 1. Paste the FULL list of IDs from your first message here:
ids = [
    399, 773, 1753, 581, 793, 2416, 2066, 684, 645, 973, 2749, 1330, 
    17, 5261, 5919, 1117, 624, 573, 10344, 1007, 446, 1132, 2669, 
    549, 1037, 3317, 2161, 1007, 1041, 3277, 11881, 2029, 19
]

# 2. Your original text for comparison
original_text = "Ở trẻ sơ sinh bị hẹp van động mạch phổi nghiêm trọng, xanh tím xuất hiện do luồng thông ở tâm nhĩ từ phải sang trái thông qua lỗ bầu dục."

# 3. Decode back
# Make sure 'tokenizer' is already defined and loaded!
# Now try decoding again
decoded_text = tokenizer.decode(ids)
print(decoded_text)


print("Decoded text:", decoded_text)

# 4. Check
# Note: strip() is often needed because some tokenizers add a start/end space
if decoded_text.strip() == original_text.strip():
    print("SUCCESS: The data is perfect for training.")
else:
    print("--- DIFFERENCE DETECTED ---")
    print("Original:", original_text)
    print("Decoded: ", decoded_text)

ở trẻ sơ sinh bị hẹp van động mạch phổi nghiêm trọng, xanh tím xuất hiện do luồng thông ở tâm nhĩ từ phải sang trái thông qua lỗ bầu dục.
Decoded text: ở trẻ sơ sinh bị hẹp van động mạch phổi nghiêm trọng, xanh tím xuất hiện do luồng thông ở tâm nhĩ từ phải sang trái thông qua lỗ bầu dục.
--- DIFFERENCE DETECTED ---
Original: Ở trẻ sơ sinh bị hẹp van động mạch phổi nghiêm trọng, xanh tím xuất hiện do luồng thông ở tâm nhĩ từ phải sang trái thông qua lỗ bầu dục.
Decoded:  ở trẻ sơ sinh bị hẹp van động mạch phổi nghiêm trọng, xanh tím xuất hiện do luồng thông ở tâm nhĩ từ phải sang trái thông qua lỗ bầu dục.


In [9]:
from torch.utils.data import Dataset
import torch  # You also need this for torch.tensor inside __getitem__
# ============================================================================
# DATASET CLASS
# ============================================================================
class TranslationDataset(Dataset):
    """Dataset for EN-VI medical translation"""
    def __init__(self, dataset, tokenizer, max_len=512):
        self.dataset = dataset
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.bos_id = tokenizer.bos_token_id
        self.eos_id = tokenizer.eos_token_id
        self.pad_id = tokenizer.pad_token_id
    
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        item = self.dataset[idx]
        
        # Tokenize
        src_ids = self.tokenizer.encode(item['en'], add_special_tokens=True, max_length=self.max_len, truncation=True)
        tgt_ids = self.tokenizer.encode(item['vi'], add_special_tokens=True, max_length=self.max_len, truncation=True)
        
        return {
            'src': torch.tensor(src_ids, dtype=torch.long),
            'tgt': torch.tensor(tgt_ids, dtype=torch.long)
        }

In [10]:
def collate_fn(batch, pad_id=0):
    """Collate function with dynamic padding"""
    src_batch = [item['src'] for item in batch]
    tgt_batch = [item['tgt'] for item in batch]
    
    # Pad sequences
    src_padded = nn.utils.rnn.pad_sequence(src_batch, batch_first=True, padding_value=pad_id)
    tgt_padded = nn.utils.rnn.pad_sequence(tgt_batch, batch_first=True, padding_value=pad_id)
    
    return {
        'src': src_padded,
        'tgt': tgt_padded
    }



In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
from torch.utils.data import Dataset, DataLoader
from datasets import load_dataset, DatasetDict
from tqdm import tqdm
import numpy as np

# ============================================================================
# POSITIONAL ENCODING
# ============================================================================
class PositionalEncoding(nn.Module):
    """Sinusoidal positional encoding for transformer"""
    def __init__(self, d_model, max_len=512, dropout=0.1):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)
        
        # Create positional encoding matrix
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * 
                            (-math.log(10000.0) / d_model))
        
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)  # (1, max_len, d_model)
        
        self.register_buffer('pe', pe)
    
    def forward(self, x):
        """
        Args:
            x: (batch_size, seq_len, d_model)
        """
        x = x + self.pe[:, :x.size(1), :]
        return self.dropout(x)

In [12]:
# ============================================================================
# MULTI-HEAD ATTENTION
# ============================================================================
class MultiHeadAttention(nn.Module):
    """Multi-head self-attention mechanism"""
    def __init__(self, d_model, num_heads, dropout=0.1):
        super().__init__()
        assert d_model % num_heads == 0
        
        self.d_model = d_model
        self.num_heads = num_heads
        self.d_k = d_model // num_heads
        
        self.W_q = nn.Linear(d_model, d_model)
        self.W_k = nn.Linear(d_model, d_model)
        self.W_v = nn.Linear(d_model, d_model)
        self.W_o = nn.Linear(d_model, d_model)
        
        self.dropout = nn.Dropout(dropout)
    
    def split_heads(self, x):
        """Split into multiple heads: (batch, seq_len, d_model) -> (batch, num_heads, seq_len, d_k)"""
        batch_size, seq_len, d_model = x.size()
        return x.view(batch_size, seq_len, self.num_heads, self.d_k).transpose(1, 2)
    
    def combine_heads(self, x):
        """Combine heads: (batch, num_heads, seq_len, d_k) -> (batch, seq_len, d_model)"""
        batch_size, num_heads, seq_len, d_k = x.size()
        return x.transpose(1, 2).contiguous().view(batch_size, seq_len, self.d_model)
    
    def forward(self, query, key, value, mask=None):
        """
        Args:
            query, key, value: (batch_size, seq_len, d_model)
            mask: (batch_size, 1, seq_len, seq_len) or (batch_size, 1, 1, seq_len)
        """
        batch_size = query.size(0)
        
        # Linear projections and split heads
        Q = self.split_heads(self.W_q(query))  # (batch, num_heads, seq_len, d_k)
        K = self.split_heads(self.W_k(key))
        V = self.split_heads(self.W_v(value))
        
        # Scaled dot-product attention
        scores = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(self.d_k)
        
        if mask is not None:
            scores = scores.masked_fill(mask == 0, -1e9)
        
        attn_weights = F.softmax(scores, dim=-1)
        attn_weights = self.dropout(attn_weights)
        
        attn_output = torch.matmul(attn_weights, V)  # (batch, num_heads, seq_len, d_k)
        
        # Combine heads and final linear
        attn_output = self.combine_heads(attn_output)  # (batch, seq_len, d_model)
        output = self.W_o(attn_output)
        
        return output



In [13]:
# ============================================================================
# FEED FORWARD NETWORK
# ============================================================================
class FeedForward(nn.Module):
    """Position-wise feed-forward network"""
    def __init__(self, d_model, d_ff, dropout=0.1):
        super().__init__()
        self.linear1 = nn.Linear(d_model, d_ff)
        self.linear2 = nn.Linear(d_ff, d_model)
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, x):
        return self.linear2(self.dropout(F.relu(self.linear1(x))))



In [14]:
# ============================================================================
# ENCODER LAYER
# ============================================================================
class EncoderLayer(nn.Module):
    """Single encoder layer with self-attention and feed-forward"""
    def __init__(self, d_model, num_heads, d_ff, dropout=0.1):
        super().__init__()
        self.self_attn = MultiHeadAttention(d_model, num_heads, dropout)
        self.feed_forward = FeedForward(d_model, d_ff, dropout)
        
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
    
    def forward(self, x, mask):
        # Self-attention with residual connection
        attn_output = self.self_attn(x, x, x, mask)
        x = self.norm1(x + self.dropout1(attn_output))
        
        # Feed-forward with residual connection
        ff_output = self.feed_forward(x)
        x = self.norm2(x + self.dropout2(ff_output))
        
        return x



In [15]:
# ============================================================================
# DECODER LAYER
# ============================================================================
class DecoderLayer(nn.Module):
    """Single decoder layer with self-attention, cross-attention, and feed-forward"""
    def __init__(self, d_model, num_heads, d_ff, dropout=0.1):
        super().__init__()
        self.self_attn = MultiHeadAttention(d_model, num_heads, dropout)
        self.cross_attn = MultiHeadAttention(d_model, num_heads, dropout)
        self.feed_forward = FeedForward(d_model, d_ff, dropout)
        
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.norm3 = nn.LayerNorm(d_model)
        
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
        self.dropout3 = nn.Dropout(dropout)
    
    def forward(self, x, encoder_output, src_mask, tgt_mask):
        # Self-attention on target
        attn_output = self.self_attn(x, x, x, tgt_mask)
        x = self.norm1(x + self.dropout1(attn_output))
        
        # Cross-attention on encoder output
        cross_attn_output = self.cross_attn(x, encoder_output, encoder_output, src_mask)
        x = self.norm2(x + self.dropout2(cross_attn_output))
        
        # Feed-forward
        ff_output = self.feed_forward(x)
        x = self.norm3(x + self.dropout3(ff_output))
        
        return x



In [16]:
# ============================================================================
# TRANSFORMER MODEL
# ============================================================================
class TransformerTranslator(nn.Module):
    """Complete Transformer model for EN-VI medical translation"""
    def __init__(
        self,
        vocab_size,
        d_model=512,
        num_heads=8,
        num_encoder_layers=6,
        num_decoder_layers=6,
        d_ff=2048,
        max_len=512,
        dropout=0.1,
        pad_idx=0
    ):
        super().__init__()
        
        self.d_model = d_model
        self.pad_idx = pad_idx
        
        # Embeddings
        self.encoder_embedding = nn.Embedding(vocab_size, d_model, padding_idx=pad_idx)
        self.decoder_embedding = nn.Embedding(vocab_size, d_model, padding_idx=pad_idx)
        
        # Positional encoding
        self.pos_encoding = PositionalEncoding(d_model, max_len, dropout)
        
        # Encoder and Decoder stacks
        self.encoder_layers = nn.ModuleList([
            EncoderLayer(d_model, num_heads, d_ff, dropout)
            for _ in range(num_encoder_layers)
        ])
        
        self.decoder_layers = nn.ModuleList([
            DecoderLayer(d_model, num_heads, d_ff, dropout)
            for _ in range(num_decoder_layers)
        ])
        
        # Output projection
        self.output_projection = nn.Linear(d_model, vocab_size)
        
        # Initialize weights
        self._init_weights()
    
    def _init_weights(self):
        """Initialize weights using Xavier uniform"""
        for p in self.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)
    
    def make_src_mask(self, src):
        """Create padding mask for source: (batch, 1, 1, src_len)"""
        src_mask = (src != self.pad_idx).unsqueeze(1).unsqueeze(2)
        return src_mask
    
    def make_tgt_mask(self, tgt):
        """Create causal mask for target: (batch, 1, tgt_len, tgt_len)"""
        batch_size, tgt_len = tgt.size()
        
        # Padding mask
        tgt_pad_mask = (tgt != self.pad_idx).unsqueeze(1).unsqueeze(2)  # (batch, 1, 1, tgt_len)
        
        # Causal mask (lower triangular)
        tgt_sub_mask = torch.tril(torch.ones((tgt_len, tgt_len), device=tgt.device)).bool()
        tgt_sub_mask = tgt_sub_mask.unsqueeze(0).unsqueeze(0)  # (1, 1, tgt_len, tgt_len)
        
        tgt_mask = tgt_pad_mask & tgt_sub_mask
        return tgt_mask
    
    def encode(self, src, src_mask):
        """Encode source sequence"""
        # Embedding + positional encoding
        x = self.encoder_embedding(src) * math.sqrt(self.d_model)
        x = self.pos_encoding(x)
        
        # Pass through encoder layers
        for layer in self.encoder_layers:
            x = layer(x, src_mask)
        
        return x
    
    def decode(self, tgt, encoder_output, src_mask, tgt_mask):
        """Decode target sequence"""
        # Embedding + positional encoding
        x = self.decoder_embedding(tgt) * math.sqrt(self.d_model)
        x = self.pos_encoding(x)
        
        # Pass through decoder layers
        for layer in self.decoder_layers:
            x = layer(x, encoder_output, src_mask, tgt_mask)
        
        return x
    
    def forward(self, src, tgt):
        """
        Args:
            src: (batch_size, src_len)
            tgt: (batch_size, tgt_len)
        Returns:
            output: (batch_size, tgt_len, vocab_size)
        """
        src_mask = self.make_src_mask(src)
        tgt_mask = self.make_tgt_mask(tgt)
        
        encoder_output = self.encode(src, src_mask)
        decoder_output = self.decode(tgt, encoder_output, src_mask, tgt_mask)
        
        output = self.output_projection(decoder_output)
        return output



In [17]:
# ============================================================================
# TRAINING FUNCTION
# ============================================================================
def train_epoch(model, dataloader, optimizer, criterion, device, grad_clip=1.0):
    """Train for one epoch"""
    model.train()
    total_loss = 0
    
    pbar = tqdm(dataloader, desc="Training")
    for batch in pbar:
        src = batch['src'].to(device)
        tgt = batch['tgt'].to(device)
        
        # Teacher forcing: use tgt[:-1] as input, predict tgt[1:]
        tgt_input = tgt[:, :-1]
        tgt_output = tgt[:, 1:]
        
        optimizer.zero_grad()
        
        # Forward pass
        output = model(src, tgt_input)  # (batch, tgt_len-1, vocab_size)
        
        # Compute loss
        output = output.reshape(-1, output.size(-1))
        tgt_output = tgt_output.reshape(-1)
        loss = criterion(output, tgt_output)
        
        # Backward pass
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
        optimizer.step()
        
        total_loss += loss.item()
        pbar.set_postfix({'loss': f'{loss.item():.4f}'})
    
    return total_loss / len(dataloader)

def evaluate(model, dataloader, criterion, device):
    """Evaluate the model"""
    model.eval()
    total_loss = 0
    
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating"):
            src = batch['src'].to(device)
            tgt = batch['tgt'].to(device)
            
            tgt_input = tgt[:, :-1]
            tgt_output = tgt[:, 1:]
            
            output = model(src, tgt_input)
            
            output = output.reshape(-1, output.size(-1))
            tgt_output = tgt_output.reshape(-1)
            loss = criterion(output, tgt_output)
            
            total_loss += loss.item()
    
    return total_loss / len(dataloader)

In [18]:
!pip install sacrebleu

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting sacrebleu
  Downloading sacrebleu-2.5.1-py3-none-any.whl.metadata (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting portalocker (from sacrebleu)
  Downloading portalocker-3.2.0-py3-none-any.whl.metadata (8.7 kB)
Downloading sacrebleu-2.5.1-py3-none-any.whl (104 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.1/104.1 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading portalocker-3.2.0-py3-none-any.whl (22 kB)
Installing collected packages: portalocker, sacrebleu
Successfully installed portalocker-3.2.0 sacrebleu-2.5.1


In [19]:
def translate_sentence(model, tokenizer, sentence, device, max_len=100):
    model.eval()

    # Tokenize input
    encoded = tokenizer(
        sentence,
        return_tensors="pt",
        padding=False,
        truncation=True
    )
    src = encoded["input_ids"].to(device)

    # Decode using greedy search
    pred_ids = greedy_decode(model, src, tokenizer, max_len=max_len)[0].tolist()

    # Trim at EOS
    if tokenizer.eos_token_id in pred_ids:
        pred_ids = pred_ids[:pred_ids.index(tokenizer.eos_token_id)]

    # Convert to text
    translation = tokenizer.decode(pred_ids, skip_special_tokens=True)
    return translation


In [20]:
import random

def get_random_test_examples(dataset, n=5):
    indices = random.sample(range(len(dataset['test'])), n)
    return [dataset['test'][i]['en'] for i in indices]

# Example: Sample 5 random English sentences
medical_examples = get_random_test_examples(dataset, n=5)

print(medical_examples)


['Objectives: To assessement outcome of breast reconstruction with the ispilateral pedicled TRAM flap in breast cancer patient having the low abdominal scars.', 'Conclusion: Patients admitted to the hospital in a state of high decompensated cirrhosis, nutritional care helps to improve their physical condition, and correct general care activities account for 89.93%.', 'This was a retrospective and prospective study, blood pressure was measured daily before and early post - transplantation, and 10 days continuously.', 'Cells were treated to curcumin, ethyl acetate or 70% ethanolic extracts from turmeric rhizomes at concentrations corresponding to that of curcumin at 2.5, 5 and 10 µg / ml (NĐ1, NĐ2, NĐ3) alone or combined with 100 mM ethanol for 24h.', 'All patients underwent CABG without using cardio-pulmonary bypass support.']


In [21]:
# ============================================================================
# FULL TRAINING SCRIPT (CLEAN + FIXED BLEU + FIXED DECODE)
# ============================================================================

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import sacrebleu

from transformers import PreTrainedTokenizerFast


# ============================================================================
# Greedy Decode
# ============================================================================
def greedy_decode(model, src, tokenizer, max_len=100):
    model.eval()
    device = src.device

    sos_id = tokenizer.bos_token_id
    eos_id = tokenizer.eos_token_id
    pad_id = tokenizer.pad_token_id

    # Create source mask
    src_mask = model.make_src_mask(src)

    with torch.no_grad():

        # Encode source sequence
        memory = model.encode(src, src_mask)

        # Start decoder input with <sos>
        ys = torch.full(
            (src.size(0), 1),
            fill_value=sos_id,
            dtype=torch.long,
            device=device
        )

        for _ in range(max_len):

            # Create target/causal mask
            tgt_mask = model.make_tgt_mask(ys)

            # Decode
            out = model.decode(ys, memory, src_mask, tgt_mask)

            # Project to vocab & pick top token
            logits = model.output_projection(out[:, -1])  # last step
            next_word = torch.argmax(logits, dim=-1).unsqueeze(1)

            # Append
            ys = torch.cat([ys, next_word], dim=1)

            # Stop if all sentences predicted EOS
            if (next_word == eos_id).all():
                break

    return ys


# ============================================================================
# Proper BLEU Computation (Correct sacrebleu Format)
# ============================================================================

def compute_bleu(model, dataloader, tokenizer, device):
    model.eval()

    hypotheses = []
    reference_stream = []

    for batch in dataloader:

        # match collate_fn keys
        src = batch["src"].to(device)
        tgt = batch["tgt"]

        # Greedy decode predictions
        pred_ids = greedy_decode(model, src, tokenizer, max_len=100)

        for i in range(src.size(0)):

            # ----- Decode Prediction -----
            pred = pred_ids[i].tolist()
            if tokenizer.eos_token_id in pred:
                pred = pred[:pred.index(tokenizer.eos_token_id)]
            pred_text = tokenizer.decode(pred, skip_special_tokens=True)

            # ----- Decode Reference -----
            ref = tgt[i].tolist()
            if tokenizer.eos_token_id in ref:
                ref = ref[:ref.index(tokenizer.eos_token_id)]
            ref_text = tokenizer.decode(ref, skip_special_tokens=True)

            hypotheses.append(pred_text)
            reference_stream.append(ref_text)

    bleu = sacrebleu.corpus_bleu(hypotheses, [reference_stream])
    return bleu.score


# ============================================================================
# MAIN TRAINING LOOP
# ============================================================================

def train():
    import torch.optim as optim

    # Hyperparameters
    BATCH_SIZE = 32
    NUM_EPOCHS = 20
    LEARNING_RATE = 1e-4
    D_MODEL = 384
    NUM_HEADS = 6
    NUM_LAYERS = 5
    D_FF = 1536
    DROPOUT = 0.15
    MAX_LEN = 256

    # Device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Load tokenizer
    tokenizer = PreTrainedTokenizerFast.from_pretrained("medical_envi_tokenizer")

    # Datasets
    train_dataset = TranslationDataset(dataset["train"], tokenizer, max_len=MAX_LEN)
    val_dataset = TranslationDataset(dataset["validation"], tokenizer, max_len=MAX_LEN)
    test_dataset = TranslationDataset(dataset["test"], tokenizer, max_len=MAX_LEN)
    

    # Dataloaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        collate_fn=lambda b: collate_fn(b, tokenizer.pad_token_id),
        num_workers=2
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        collate_fn=lambda b: collate_fn(b, tokenizer.pad_token_id),
        num_workers=2
    )

    test_loader = DataLoader(
        test_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        collate_fn=lambda b: collate_fn(b, tokenizer.pad_token_id),
        num_workers=2
    )

    # Model
    model = TransformerTranslator(
        vocab_size=tokenizer.vocab_size,
        d_model=D_MODEL,
        num_heads=NUM_HEADS,
        num_encoder_layers=NUM_LAYERS,
        num_decoder_layers=NUM_LAYERS,
        d_ff=D_FF,
        max_len=MAX_LEN,
        dropout=DROPOUT,
        pad_idx=tokenizer.pad_token_id
    ).to(device)

    print(f"\nModel Parameters: {sum(p.numel() for p in model.parameters()):,}")

    # Loss + Optimizer
    criterion = nn.CrossEntropyLoss(ignore_index=tokenizer.pad_token_id)
    optimizer = optim.Adam(
        model.parameters(),
        lr=LEARNING_RATE,
        betas=(0.9, 0.98),
        eps=1e-9
    )

    # Training
    best_val_loss = float("inf")

    for epoch in range(NUM_EPOCHS):
        print("\n" + "="*60)
        print(f"Epoch {epoch + 1}/{NUM_EPOCHS}")
        print("="*60)

        train_loss = train_epoch(model, train_loader, optimizer, criterion, device)
        val_loss = evaluate(model, val_loader, criterion, device)

        print(f"\nTrain Loss: {train_loss:.4f}")
        print(f"Val Loss  : {val_loss:.4f}")
        
        print("\nSample Translations:")
        for s in medical_examples:
            translation = translate_sentence(model, tokenizer, s, device)
            print(f"EN: {s}")
            print(f"VI: {translation}")
            print("---")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save({
                "epoch": epoch,
                "model_state_dict": model.state_dict(),
                "optimizer_state_dict": optimizer.state_dict(),
                "val_loss": val_loss,
            }, "best_medical_translator.pt")

            print(f"✓ Saved best model (val_loss={val_loss:.4f})")

    print("\nTraining complete!")

    # ============================================================================
    # BLEU Evaluation
    # ============================================================================

    print("\nCalculating BLEU on validation set...")
    bleu_score = compute_bleu(model, test_loader, tokenizer, device)
    print(f"\nBLEU Score: {bleu_score:.2f}")
    print("="*60)


# Run training
if __name__ == "__main__":
    train()


Using device: cuda

Model Parameters: 57,601,280

Epoch 1/20


Training:   0%|          | 0/9509 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Training: 100%|██████████| 9509/9509 [22:26<00:00,  7.06it/s, loss=3.7837]
Evaluating:   0%|          | 0/504 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PA


Train Loss: 4.5540
Val Loss  : 3.5345

Sample Translations:
EN: Objectives: To assessement outcome of breast reconstruction with the ispilateral pedicled TRAM flap in breast cancer patient having the low abdominal scars.
VI: mục tiêu: đánh giá kết quả phẫu thuật cắt lớp vi tính của phẫu thuật nội soi cắt lớp vi tính có thể được thực hiện trong phẫu thuật cắt tuyến giáp.
---
EN: Conclusion: Patients admitted to the hospital in a state of high decompensated cirrhosis, nutritional care helps to improve their physical condition, and correct general care activities account for 89.93%.
VI: kết luận: tỷ lệ bệnh nhân có kiến thức, thái độ, thái độ, thái độ, thái độ, thực hành đúng về phòng ngừa bệnh, điều trị tại bệnh viện đa khoa tỉnh thái nguyên.
---
EN: This was a retrospective and prospective study, blood pressure was measured daily before and early post - transplantation, and 10 days continuously.
VI: đối tượng và phương pháp nghiên cứu: nghiên cứu tiến cứu, tiến cứu, tiến cứu, tiến hành

Training:   0%|          | 0/9509 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Training: 100%|██████████| 9509/9509 [22:27<00:00,  7.06it/s, loss=2.9054]
Evaluating:   0%|          | 0/504 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PA


Train Loss: 3.4009
Val Loss  : 3.0531

Sample Translations:
EN: Objectives: To assessement outcome of breast reconstruction with the ispilateral pedicled TRAM flap in breast cancer patient having the low abdominal scars.
VI: mục tiêu: đánh giá kết quả phẫu thuật cắt tuyến vú bằng vạt da cơ thắt lưng bằng vạt da cơ thắt lưng.
---
EN: Conclusion: Patients admitted to the hospital in a state of high decompensated cirrhosis, nutritional care helps to improve their physical condition, and correct general care activities account for 89.93%.
VI: kết luận: bệnh nhân suy dinh dưỡng có tỷ lệ bệnh nhân cao tuổi, bệnh viện, bệnh viện đa khoa tỉnh thái bình là rất quan trọng, cần có sự cải thiện chất lượng cuộc sống của bệnh nhân.
---
EN: This was a retrospective and prospective study, blood pressure was measured daily before and early post - transplantation, and 10 days continuously.
VI: đối tượng và phương pháp nghiên cứu: nghiên cứu hồi cứu, tiến cứu, tiến cứu, tiến hành trên bệnh nhân suy tim 

Training:   0%|          | 0/9509 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Training: 100%|██████████| 9509/9509 [22:25<00:00,  7.06it/s, loss=2.7405]
Evaluating:   0%|          | 0/504 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PA


Train Loss: 3.0378
Val Loss  : 2.7999

Sample Translations:
EN: Objectives: To assessement outcome of breast reconstruction with the ispilateral pedicled TRAM flap in breast cancer patient having the low abdominal scars.
VI: mục tiêu: đánh giá kết quả phẫu thuật cắt bỏ tuyến vú bằng vạt da cơ thanh ở bệnh nhân ung thư vú giai đoạn sớm.
---
EN: Conclusion: Patients admitted to the hospital in a state of high decompensated cirrhosis, nutritional care helps to improve their physical condition, and correct general care activities account for 89.93%.
VI: kết luận: bệnh nhân suy dinh dưỡng có tỷ lệ suy dinh dưỡng cao, cần chú ý đến chăm sóc dinh dưỡng, chăm sóc dinh dưỡng, bệnh nhân có chất lượng cuộc sống tốt, giảm chất lượng cuộc sống.
---
EN: This was a retrospective and prospective study, blood pressure was measured daily before and early post - transplantation, and 10 days continuously.
VI: nghiên cứu được thực hiện trên bệnh nhân sau phẫu thuật và sau đó được theo dõi và theo dõi sau 

Training:   0%|          | 0/9509 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Training: 100%|██████████| 9509/9509 [22:25<00:00,  7.07it/s, loss=2.7499]
Evaluating:   0%|          | 0/504 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PA


Train Loss: 2.8125
Val Loss  : 2.6388

Sample Translations:
EN: Objectives: To assessement outcome of breast reconstruction with the ispilateral pedicled TRAM flap in breast cancer patient having the low abdominal scars.
VI: mục tiêu: đánh giá kết quả phẫu thuật cắt bỏ vú bằng vạt da cơ ngực dưới đòn có cuống rốn ở người trưởng thành.
---
EN: Conclusion: Patients admitted to the hospital in a state of high decompensated cirrhosis, nutritional care helps to improve their physical condition, and correct general care activities account for 89.93%.
VI: kết luận: bệnh nhân suy dinh dưỡng có tình trạng dinh dưỡng cao, có thể được điều trị kịp thời, giảm nhu cầu chăm sóc dinh dưỡng, giảm albumin máu, giảm albumin máu, giảm albumin máu.
---
EN: This was a retrospective and prospective study, blood pressure was measured daily before and early post - transplantation, and 10 days continuously.
VI: phương pháp nghiên cứu: nghiên cứu hồi cứu, theo dõi dọc, theo dõi dọc sau 6 tháng và 12 tháng.
---

Training:   0%|          | 0/9509 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Training: 100%|██████████| 9509/9509 [22:28<00:00,  7.05it/s, loss=2.4391]
Evaluating:   0%|          | 0/504 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PA


Train Loss: 2.6415
Val Loss  : 2.4273

Sample Translations:
EN: Objectives: To assessement outcome of breast reconstruction with the ispilateral pedicled TRAM flap in breast cancer patient having the low abdominal scars.
VI: mục tiêu: đánh giá kết quả phẫu thuật tạo hình vú bằng vạt da cơ ngực dưới vú trong cắt vú lành tính.
---
EN: Conclusion: Patients admitted to the hospital in a state of high decompensated cirrhosis, nutritional care helps to improve their physical condition, and correct general care activities account for 89.93%.
VI: kết luận: bệnh nhân nhập viện có tình trạng dinh dưỡng cao, có thể điều trị bảo tồn, giảm nhu cầu chăm sóc dinh dưỡng và chăm sóc sức khoẻ cho người bệnh là rất tốt.
---
EN: This was a retrospective and prospective study, blood pressure was measured daily before and early post - transplantation, and 10 days continuously.
VI: nghiên cứu được tiến hành nhằm đánh giá tình trạng huyết áp và huyết áp trước và sau điều trị, sau đó là ngày thứ 2 và ngày thứ

Training:   0%|          | 0/9509 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Training: 100%|██████████| 9509/9509 [22:27<00:00,  7.06it/s, loss=2.2410]
Evaluating:   0%|          | 0/504 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PA


Train Loss: 2.3881
Val Loss  : 2.0851

Sample Translations:
EN: Objectives: To assessement outcome of breast reconstruction with the ispilateral pedicled TRAM flap in breast cancer patient having the low abdominal scars.
VI: mục tiêu: đánh giá kết quả điều trị phẫu thuật ung thư vú có cuống glisson dưới đòn có cuống glisson dưới da.
---
EN: Conclusion: Patients admitted to the hospital in a state of high decompensated cirrhosis, nutritional care helps to improve their physical condition, and correct general care activities account for 89.93%.
VI: kết luận: bệnh nhân nhập viện điều trị suy dinh dưỡng thể nặng, cải thiện tình trạng dinh dưỡng, giúp cải thiện tình trạng dinh dưỡng, chăm sóc sức khoẻ, chăm sóc sức khoẻ và chăm sóc sức khoẻ của người bệnh là 89,93%.
---
EN: This was a retrospective and prospective study, blood pressure was measured daily before and early post - transplantation, and 10 days continuously.
VI: nghiên cứu hồi cứu, mô tả, huyết áp được thực hiện trước và sau gh

Training:   0%|          | 0/9509 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Training: 100%|██████████| 9509/9509 [22:28<00:00,  7.05it/s, loss=2.1606]
Evaluating:   0%|          | 0/504 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PA


Train Loss: 2.0979
Val Loss  : 1.8413

Sample Translations:
EN: Objectives: To assessement outcome of breast reconstruction with the ispilateral pedicled TRAM flap in breast cancer patient having the low abdominal scars.
VI: mục tiêu: đánh giá kết quả phẫu thuật tái tạo vú bằng vạt nhánh xuyên cuống vú trong điều trị ung thư vú có sẹo mổ thấp.
---
EN: Conclusion: Patients admitted to the hospital in a state of high decompensated cirrhosis, nutritional care helps to improve their physical condition, and correct general care activities account for 89.93%.
VI: kết luận: bệnh nhân nhập viện điều trị xơ gan mất bù, chăm sóc dinh dưỡng giúp cải thiện tình trạng dinh dưỡng, chăm sóc dinh dưỡng hợp lý, chăm sóc sức khoẻ hợp lý chiếm 89,93%.
---
EN: This was a retrospective and prospective study, blood pressure was measured daily before and early post - transplantation, and 10 days continuously.
VI: nghiên cứu hồi cứu, tiến cứu, mô tả, huyết áp được đo hàng ngày trước và sau ghép, và 10 ngày l

Training:   0%|          | 0/9509 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Training: 100%|██████████| 9509/9509 [22:26<00:00,  7.06it/s, loss=1.3924]
Evaluating:   0%|          | 0/504 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PA


Train Loss: 1.9082
Val Loss  : 1.7130

Sample Translations:
EN: Objectives: To assessement outcome of breast reconstruction with the ispilateral pedicled TRAM flap in breast cancer patient having the low abdominal scars.
VI: mục tiêu: đánh giá kết quả phẫu thuật tái tạo vú bằng vạt da cuống mạch vú hai cuống liền tại bệnh viện ung bướu nghệ an.
---
EN: Conclusion: Patients admitted to the hospital in a state of high decompensated cirrhosis, nutritional care helps to improve their physical condition, and correct general care activities account for 89.93%.
VI: kết luận: bệnh nhân nhập viện điều trị xơ gan mất bù, chăm sóc dinh dưỡng giúp cải thiện tình trạng sức khoẻ, chăm sóc dinh dưỡng hợp lý chiếm 89,93%.
---
EN: This was a retrospective and prospective study, blood pressure was measured daily before and early post - transplantation, and 10 days continuously.
VI: nghiên cứu hồi cứu và tiến cứu, huyết áp được đo hàng ngày trước và sau ghép, và 10 ngày liên tục.
---
EN: Cells were trea

Training:   0%|          | 0/9509 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Training: 100%|██████████| 9509/9509 [22:26<00:00,  7.06it/s, loss=1.3944]
Evaluating:   0%|          | 0/504 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PA


Train Loss: 1.7768
Val Loss  : 1.6282

Sample Translations:
EN: Objectives: To assessement outcome of breast reconstruction with the ispilateral pedicled TRAM flap in breast cancer patient having the low abdominal scars.
VI: mục tiêu: đánh giá kết quả phẫu thuật tái tạo vú bằng vạt da nhánh xuyên cuống liền ở bệnh nhân ung thư vú có sẹo mổ thấp.
---
EN: Conclusion: Patients admitted to the hospital in a state of high decompensated cirrhosis, nutritional care helps to improve their physical condition, and correct general care activities account for 89.93%.
VI: kết luận: bệnh nhân nhập viện trong tình trạng xơ gan mất bù, chăm sóc dinh dưỡng giúp cải thiện tình trạng sức khoẻ thể chất, chăm sóc sức khoẻ hợp lý chiếm 89,93%.
---
EN: This was a retrospective and prospective study, blood pressure was measured daily before and early post - transplantation, and 10 days continuously.
VI: đây là nghiên cứu hồi cứu và tiến cứu, huyết áp được đo hàng ngày trước và sau ghép, và 10 ngày liên tục.


Training:   0%|          | 0/9509 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Training: 100%|██████████| 9509/9509 [22:28<00:00,  7.05it/s, loss=1.5062]
Evaluating:   0%|          | 0/504 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PA


Train Loss: 1.6791
Val Loss  : 1.5591

Sample Translations:
EN: Objectives: To assessement outcome of breast reconstruction with the ispilateral pedicled TRAM flap in breast cancer patient having the low abdominal scars.
VI: mục tiêu: đánh giá kết quả tái tạo vú bằng vạt da nhánh xuyên cuống mạch liền trong điều trị ung thư vú có sẹo bụng thấp.
---
EN: Conclusion: Patients admitted to the hospital in a state of high decompensated cirrhosis, nutritional care helps to improve their physical condition, and correct general care activities account for 89.93%.
VI: kết luận: bệnh nhân nhập viện trong tình trạng xơ gan mất bù, chăm sóc dinh dưỡng giúp cải thiện tình trạng thể chất, điều kiện chăm sóc chung đúng chiếm 89,93%.
---
EN: This was a retrospective and prospective study, blood pressure was measured daily before and early post - transplantation, and 10 days continuously.
VI: nghiên cứu hồi cứu, tiến cứu, mô tả, huyết áp được đo hàng ngày trước và sau ghép, và 10 ngày liên tục.
---
EN:

Training:   0%|          | 0/9509 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Training: 100%|██████████| 9509/9509 [22:29<00:00,  7.05it/s, loss=1.9383]
Evaluating:   0%|          | 0/504 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PA


Train Loss: 1.6039
Val Loss  : 1.5181

Sample Translations:
EN: Objectives: To assessement outcome of breast reconstruction with the ispilateral pedicled TRAM flap in breast cancer patient having the low abdominal scars.
VI: mục tiêu: đánh giá kết quả tạo hình vú bằng vạt da nhánh xuyên hai cuống mạch trong điều trị ung thư vú có sẹo bụng dưới.
---
EN: Conclusion: Patients admitted to the hospital in a state of high decompensated cirrhosis, nutritional care helps to improve their physical condition, and correct general care activities account for 89.93%.
VI: kết luận: người bệnh nhập viện trong tình trạng xơ gan mất bù, chăm sóc dinh dưỡng giúp cải thiện tình trạng thể chất, chăm sóc chung đúng cách chiếm 89,93%.
---
EN: This was a retrospective and prospective study, blood pressure was measured daily before and early post - transplantation, and 10 days continuously.
VI: nghiên cứu hồi cứu, tiến cứu, mô tả, huyết áp được đo hàng ngày trước và sau ghép, 10 ngày liên tục.
---
EN: Cells 

Training:   0%|          | 0/9509 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Training: 100%|██████████| 9509/9509 [22:27<00:00,  7.05it/s, loss=1.4942]
Evaluating:   0%|          | 0/504 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PA


Train Loss: 1.5439
Val Loss  : 1.4826

Sample Translations:
EN: Objectives: To assessement outcome of breast reconstruction with the ispilateral pedicled TRAM flap in breast cancer patient having the low abdominal scars.
VI: mục tiêu: đánh giá kết quả tạo hình vú bằng vạt da cuống liền cuống mạch vú trong điều trị ung thư vú có sẹo bụng dưới.
---
EN: Conclusion: Patients admitted to the hospital in a state of high decompensated cirrhosis, nutritional care helps to improve their physical condition, and correct general care activities account for 89.93%.
VI: kết luận: người bệnh nhập viện trong tình trạng xơ gan mất bù, chăm sóc dinh dưỡng giúp cải thiện tình trạng thể lực, và các hoạt động chăm sóc chung đúng chiếm 89,93%.
---
EN: This was a retrospective and prospective study, blood pressure was measured daily before and early post - transplantation, and 10 days continuously.
VI: nghiên cứu hồi cứu và tiến cứu, huyết áp được đo hàng ngày trước và sau ghép, và 10 ngày liên tục.
---
EN:

Training:   0%|          | 0/9509 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Training: 100%|██████████| 9509/9509 [22:33<00:00,  7.02it/s, loss=1.5046]
Evaluating:   0%|          | 0/504 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PA


Train Loss: 1.4938
Val Loss  : 1.4533

Sample Translations:
EN: Objectives: To assessement outcome of breast reconstruction with the ispilateral pedicled TRAM flap in breast cancer patient having the low abdominal scars.
VI: mục tiêu: đánh giá kết quả tái tạo vú bằng vạt da nhánh xuyên động mạch vú cùng bên ở bệnh nhân ung thư vú có sẹo bụng dưới.
---
EN: Conclusion: Patients admitted to the hospital in a state of high decompensated cirrhosis, nutritional care helps to improve their physical condition, and correct general care activities account for 89.93%.
VI: kết luận: bệnh nhân nhập viện trong tình trạng xơ gan mất bù, chăm sóc dinh dưỡng giúp cải thiện tình trạng thể chất, và các hoạt động chăm sóc chung đúng chiếm 89,93%.
---
EN: This was a retrospective and prospective study, blood pressure was measured daily before and early post - transplantation, and 10 days continuously.
VI: đây là nghiên cứu hồi cứu, tiến cứu, mô tả, tiến cứu, theo dõi dọc, huyết áp được đo hàng ngày trước 

Training:   0%|          | 0/9509 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Training: 100%|██████████| 9509/9509 [22:31<00:00,  7.04it/s, loss=1.5557]
Evaluating:   0%|          | 0/504 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PA


Train Loss: 1.4529
Val Loss  : 1.4270

Sample Translations:
EN: Objectives: To assessement outcome of breast reconstruction with the ispilateral pedicled TRAM flap in breast cancer patient having the low abdominal scars.
VI: mục tiêu: đánh giá kết quả tái tạo vú bằng vạt da cuống liền hai bên ở bệnh nhân ung thư vú có sẹo bụng thấp.
---
EN: Conclusion: Patients admitted to the hospital in a state of high decompensated cirrhosis, nutritional care helps to improve their physical condition, and correct general care activities account for 89.93%.
VI: kết luận: bệnh nhân nhập viện trong tình trạng xơ gan mất bù, chăm sóc dinh dưỡng giúp cải thiện tình trạng thể chất, và các hoạt động chăm sóc sức khoẻ tổng quát chiếm 89,93%.
---
EN: This was a retrospective and prospective study, blood pressure was measured daily before and early post - transplantation, and 10 days continuously.
VI: nghiên cứu hồi cứu và tiến cứu, huyết áp được đo hàng ngày trước và sau ghép, và 10 ngày liên tục.
---
EN: C

Training:   0%|          | 0/9509 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Training: 100%|██████████| 9509/9509 [22:31<00:00,  7.04it/s, loss=1.5042]
Evaluating:   0%|          | 0/504 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PA


Train Loss: 1.4184
Val Loss  : 1.4201

Sample Translations:
EN: Objectives: To assessement outcome of breast reconstruction with the ispilateral pedicled TRAM flap in breast cancer patient having the low abdominal scars.
VI: mục tiêu: đánh giá kết quả tạo hình vú bằng vạt cuống liền hai bên ở bệnh nhân ung thư vú có sẹo vùng bụng dưới.
---
EN: Conclusion: Patients admitted to the hospital in a state of high decompensated cirrhosis, nutritional care helps to improve their physical condition, and correct general care activities account for 89.93%.
VI: kết luận: người bệnh vào viện trong tình trạng xơ gan mất bù, chăm sóc dinh dưỡng giúp cải thiện tình trạng thể chất, và hoạt động chăm sóc chung đúng chiếm 89,93%.
---
EN: This was a retrospective and prospective study, blood pressure was measured daily before and early post - transplantation, and 10 days continuously.
VI: đây là nghiên cứu hồi cứu và tiến cứu, huyết áp được đo hàng ngày trước và sau ghép sớm, và 10 ngày liên tục.
---
EN:

Training:   0%|          | 0/9509 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Training: 100%|██████████| 9509/9509 [22:26<00:00,  7.06it/s, loss=1.5259]
Evaluating:   0%|          | 0/504 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PA


Train Loss: 1.3881
Val Loss  : 1.4037

Sample Translations:
EN: Objectives: To assessement outcome of breast reconstruction with the ispilateral pedicled TRAM flap in breast cancer patient having the low abdominal scars.
VI: mục tiêu: đánh giá kết quả tạo hình vú bằng vạt da cuống liền hai bên ở bệnh nhân ung thư vú có sẹo vùng bụng dưới.
---
EN: Conclusion: Patients admitted to the hospital in a state of high decompensated cirrhosis, nutritional care helps to improve their physical condition, and correct general care activities account for 89.93%.
VI: kết luận: bệnh nhân nhập viện trong tình trạng xơ gan mất bù, chăm sóc dinh dưỡng giúp cải thiện tình trạng thể chất của họ, và chăm sóc tổng quát đúng chiếm 89,93%.
---
EN: This was a retrospective and prospective study, blood pressure was measured daily before and early post - transplantation, and 10 days continuously.
VI: nghiên cứu hồi cứu và tiến cứu, huyết áp được đo hàng ngày trước và sau ghép, và 10 ngày liên tục.
---
EN: Cells 

Training:   0%|          | 0/9509 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Training: 100%|██████████| 9509/9509 [22:25<00:00,  7.07it/s, loss=1.5475]
Evaluating:   0%|          | 0/504 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PA


Train Loss: 1.3615
Val Loss  : 1.3883

Sample Translations:
EN: Objectives: To assessement outcome of breast reconstruction with the ispilateral pedicled TRAM flap in breast cancer patient having the low abdominal scars.
VI: mục tiêu: đánh giá kết quả phẫu thuật tái tạo vú bằng vạt da nhánh xuyên động mạch ngực trong điều trị ung thư vú có sẹo mổ vùng bụng dưới.
---
EN: Conclusion: Patients admitted to the hospital in a state of high decompensated cirrhosis, nutritional care helps to improve their physical condition, and correct general care activities account for 89.93%.
VI: kết luận: người bệnh nhập viện trong tình trạng xơ gan mất bù, chăm sóc dinh dưỡng giúp cải thiện tình trạng thể chất, và chăm sóc chung đúng chiếm 89,93%.
---
EN: This was a retrospective and prospective study, blood pressure was measured daily before and early post - transplantation, and 10 days continuously.
VI: nghiên cứu hồi cứu và tiến cứu, huyết áp được đo hàng ngày trước và sau ghép, và 10 ngày liên tục.


Training:   0%|          | 0/9509 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Training: 100%|██████████| 9509/9509 [22:33<00:00,  7.03it/s, loss=1.2364]
Evaluating:   0%|          | 0/504 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PA


Train Loss: 1.3376
Val Loss  : 1.3658

Sample Translations:
EN: Objectives: To assessement outcome of breast reconstruction with the ispilateral pedicled TRAM flap in breast cancer patient having the low abdominal scars.
VI: mục tiêu: đánh giá kết quả tạo hình vú bằng vạt da nhánh xuyên động mạch vú trong điều trị ung thư vú có sẹo mổ vùng bụng dưới.
---
EN: Conclusion: Patients admitted to the hospital in a state of high decompensated cirrhosis, nutritional care helps to improve their physical condition, and correct general care activities account for 89.93%.
VI: kết luận: người bệnh nhập viện trong tình trạng xơ gan mất bù, chăm sóc dinh dưỡng giúp cải thiện tình trạng thể chất, và hoạt động chăm sóc tổng quát đúng chiếm 89,93%.
---
EN: This was a retrospective and prospective study, blood pressure was measured daily before and early post - transplantation, and 10 days continuously.
VI: nghiên cứu hồi cứu và tiến cứu, huyết áp được đo hàng ngày trước và sau ghép, và 10 ngày liên tục

Training:   0%|          | 0/9509 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Training: 100%|██████████| 9509/9509 [22:31<00:00,  7.03it/s, loss=1.3408]
Evaluating:   0%|          | 0/504 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PA


Train Loss: 1.3162
Val Loss  : 1.3590

Sample Translations:
EN: Objectives: To assessement outcome of breast reconstruction with the ispilateral pedicled TRAM flap in breast cancer patient having the low abdominal scars.
VI: mục tiêu: đánh giá kết quả tạo hình vú bằng vạt cuống mạch xuyên cuống cùng bên ở bệnh nhân ung thư vú có sẹo mổ vùng bụng dưới.
---
EN: Conclusion: Patients admitted to the hospital in a state of high decompensated cirrhosis, nutritional care helps to improve their physical condition, and correct general care activities account for 89.93%.
VI: kết luận: người bệnh vào viện trong tình trạng xơ gan mất bù, chăm sóc dinh dưỡng giúp cải thiện tình trạng thể chất, và hoạt động chăm sóc chung đúng chiếm 89,93%.
---
EN: This was a retrospective and prospective study, blood pressure was measured daily before and early post - transplantation, and 10 days continuously.
VI: đây là nghiên cứu hồi cứu và tiến cứu, đo huyết áp hàng ngày trước và sau ghép, và 10 ngày liên tục.


Training:   0%|          | 0/9509 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Training: 100%|██████████| 9509/9509 [22:34<00:00,  7.02it/s, loss=1.2705]
Evaluating:   0%|          | 0/504 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PA


Train Loss: 1.2972
Val Loss  : 1.3534

Sample Translations:
EN: Objectives: To assessement outcome of breast reconstruction with the ispilateral pedicled TRAM flap in breast cancer patient having the low abdominal scars.
VI: mục tiêu: đánh giá kết quả tái tạo vú bằng vạt nhánh xuyên động mạch ngực trong điều trị ung thư vú có sẹo mổ vùng bụng dưới.
---
EN: Conclusion: Patients admitted to the hospital in a state of high decompensated cirrhosis, nutritional care helps to improve their physical condition, and correct general care activities account for 89.93%.
VI: kết luận: bệnh nhân nhập viện trong tình trạng xơ gan mất bù, chăm sóc dinh dưỡng giúp cải thiện tình trạng thể chất của họ, và các hoạt động chăm sóc chung đúng chiếm 89,93%.
---
EN: This was a retrospective and prospective study, blood pressure was measured daily before and early post - transplantation, and 10 days continuously.
VI: nghiên cứu hồi cứu và tiến cứu, huyết áp được đo hàng ngày trước và sau ghép, và 10 ngày liên

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)



BLEU Score: 45.69
