In [None]:
%pip install mysql-connector-python

In [None]:
import os
import json
import logging
import pickle
import psutil
import threading
import torch
from datetime import datetime
from typing import List, Dict, Generator, Optional
from dataclasses import dataclass
from concurrent.futures import ThreadPoolExecutor, as_completed
import mysql.connector
from mysql.connector import pooling
import tiktoken
import time
import hashlib
from pathlib import Path
from transformers import (
    AutoTokenizer, AutoModelForCausalLM, 
    pipeline, BitsAndBytesConfig
)
import gc

In [None]:
import json
import re
import logging
from typing import List, Dict, Generator
import mysql.connector
from mysql.connector import pooling
from pathlib import Path
from collections import defaultdict
import transformers

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
logger = logging.getLogger(__name__)

class Config:
    # Database config
    DB_CONFIG = {
        'host': 'localhost',
        'user': 'root', 
        'password': 'baohuy2501',
        'database': 'law_service',
        'port': 3307,
        'pool_size': 5
    }
    
    # Processing config
    MAX_SEQ_LENGTH = 256
    MIN_TEXT_LENGTH = 50
    OUTPUT_DIR = "phobert_training_data"

class VietnameseLegalProcessor:
    """Xử lý văn bản pháp luật tiếng Việt"""
    
    def __init__(self):
        self.legal_categories = {
            'hình_sự': ['tội phạm', 'hình phạt', 'án tù'],
            'dân_sự': ['hợp đồng', 'bồi thường', 'hôn nhân'],
            'hành_chính': ['giấy phép', 'thủ tục', 'công chức'],
            'thương_mại': ['doanh nghiệp', 'kinh doanh', 'đầu tư'],
            'lao_động': ['lao động', 'tiền lương', 'bảo hiểm']
        }
    
    def clean_text(self, text: str) -> str:
        """Làm sạch văn bản"""
        if not text or len(text.strip()) < 10:
            return ""
        
        # Xóa khoảng trắng thừa
        text = re.sub(r'\s+', ' ', text.strip())
        
        # Chuẩn hóa cấu trúc
        text = re.sub(r'([.!?])\s*([A-ZÀÁÂÃÈÉÊÌÍÎÏÒÓÔÕÙÚÛÜÝĐ])', r'\1 \2', text)
        
        return text
    
    def classify_content(self, text: str) -> str:
        """Phân loại nội dung pháp luật"""
        text_lower = text.lower()
        scores = defaultdict(int)
        
        for category, keywords in self.legal_categories.items():
            for keyword in keywords:
                scores[category] += text_lower.count(keyword)
        
        return max(scores.items(), key=lambda x: x[1])[0] if scores else "general"
    
    def chunk_text(self, text: str, max_length: int = 256) -> List[str]:
        """Chia nhỏ văn bản"""
        sentences = re.split(r'(?<=[.!?])\s+', text)
        chunks = []
        current_chunk = ""
        
        for sentence in sentences:
            test_chunk = f"{current_chunk} {sentence}".strip()
            
            if len(test_chunk) <= max_length:
                current_chunk = test_chunk
            else:
                if current_chunk:
                    chunks.append(current_chunk)
                current_chunk = sentence[:max_length]
        
        if current_chunk:
            chunks.append(current_chunk)
        
        return [chunk for chunk in chunks if len(chunk) >= Config.MIN_TEXT_LENGTH]

class PhoBERTDataGenerator:
    """Tạo dữ liệu huấn luyện cho PhoBERT"""
    
    def __init__(self):
        self.processor = VietnameseLegalProcessor()
        self.qa_templates = [
            ("Nội dung chính của quy định này là gì?", "main_content"),
            ("Quy định này áp dụng cho ai?", "subject"),
            ("Điều kiện thực hiện là gì?", "conditions"),
            ("Thủ tục cần thiết là gì?", "procedures")
        ]
    
    def generate_qa_pairs(self, text: str, category: str) -> List[Dict]:
        """Tạo cặp hỏi-đáp với answer được extract từ context"""
        qa_pairs = []
        chunks = self.processor.chunk_text(text)
        
        for chunk in chunks:  # Xử lý tất cả chunks
            for question, q_type in self.qa_templates:
                answer = self._extract_answer_from_context(question, chunk, q_type)
                if answer and len(answer.strip()) > 10:  # Đảm bảo answer có nội dung
                    qa_pairs.append({
                        "question": question,
                        "context": chunk,
                        "answer": answer,
                        "category": category
                    })
        
        return qa_pairs
    
    def _extract_answer_from_context(self, question: str, context: str, q_type: str) -> str:
        """Extract answer từ context dựa trên question và context"""
        sentences = re.split(r'(?<=[.!?])\s+', context)
        context_lower = context.lower()
        
        if q_type == "main_content":
            # Lấy 2-3 câu đầu làm nội dung chính
            main_sentences = sentences[:2]
            return '. '.join(main_sentences).strip() + '.' if main_sentences else ""
        
        elif q_type == "subject":
            # Tìm câu có từ khóa về đối tượng áp dụng
            subject_keywords = ['áp dụng', 'đối tượng', 'phạm vi', 'người', 'tổ chức', 'cá nhân']
            for sentence in sentences:
                if any(keyword in sentence.lower() for keyword in subject_keywords):
                    return sentence.strip()
            # Fallback: lấy câu đầu tiên
            return sentences[0].strip() if sentences else ""
        
        elif q_type == "conditions":
            # Tìm câu có từ khóa về điều kiện
            condition_keywords = ['điều kiện', 'yêu cầu', 'tiêu chuẩn', 'phải', 'cần', 'quy định']
            for sentence in sentences:
                if any(keyword in sentence.lower() for keyword in condition_keywords):
                    return sentence.strip()
            # Fallback: tìm câu có số liệu hoặc danh sách
            for sentence in sentences:
                if any(char in sentence for char in [':', ';', '1.', '2.', 'a)', 'b)']):
                    return sentence.strip()
            return sentences[0].strip() if sentences else ""
        
        elif q_type == "procedures":
            # Tìm câu có từ khóa về thủ tục
            procedure_keywords = ['thủ tục', 'quy trình', 'trình tự', 'bước', 'giai đoạn', 'nộp', 'đăng ký']
            for sentence in sentences:
                if any(keyword in sentence.lower() for keyword in procedure_keywords):
                    return sentence.strip()
            # Fallback: tìm câu có động từ hành động
            action_words = ['thực hiện', 'tiến hành', 'làm', 'nộp', 'gửi', 'đăng ký']
            for sentence in sentences:
                if any(word in sentence.lower() for word in action_words):
                    return sentence.strip()
            return sentences[0].strip() if sentences else ""
        
        # Default: trả về câu đầu tiên
        return sentences[0].strip() if sentences else ""
    
    def generate_classification_data(self, text: str) -> List[Dict]:
        """Tạo dữ liệu phân loại"""
        chunks = self.processor.chunk_text(text)
        classification_data = []
        
        for chunk in chunks:
            category = self.processor.classify_content(chunk)
            classification_data.append({
                "text": chunk,
                "label": category
            })
        
        return classification_data

class DatabaseManager:
    """Quản lý kết nối database"""
    
    def __init__(self):
        self.pool = pooling.MySQLConnectionPool(
            pool_name="legal_pool",
            pool_size=Config.DB_CONFIG['pool_size'],
            **{k: v for k, v in Config.DB_CONFIG.items() if k != 'pool_size'}
        )
    
    def get_documents(self, limit: int = 1000) -> Generator[Dict, None, None]:
        """Lấy documents từ database"""
        conn = self.pool.get_connection()
        try:
            cursor = conn.cursor(dictionary=True)
            cursor.execute("""
                SELECT 
                    pdtopic.name AS topic_name,
                    pdsubject.name AS subject_name,
                    pdarticle.id AS article_id,
                    pdarticle.content AS content
                FROM pdtopic
                JOIN pdsubject ON pdsubject.id_topic = pdtopic.id
                JOIN pdchapter ON pdchapter.id_subject = pdsubject.id  
                JOIN pdarticle ON pdarticle.id_chapter = pdchapter.id
                WHERE pdarticle.content IS NOT NULL
                LIMIT %s
            """, (limit,))
            
            for row in cursor:
                yield {
                    'id': row['article_id'],
                    'content': row['content'],
                    'category': row['topic_name']
                }
        finally:
            conn.close()

class LegalDatasetGenerator:
    """Main class tạo dataset"""
    
    def __init__(self):
        self.db_manager = DatabaseManager()
        self.data_generator = PhoBERTDataGenerator()
        
        # Tạo thư mục output
        Path(Config.OUTPUT_DIR).mkdir(exist_ok=True)
        
        self.stats = {
            'documents_processed': 0,
            'qa_pairs_generated': 0,
            'classification_samples': 0
        }
    
    def process_document(self, doc: Dict) -> Dict:
        """Xử lý một document"""
        content = doc['content']
        category = doc['category']
        
        if len(content) < Config.MIN_TEXT_LENGTH:
            return {}
        
        # Tạo dữ liệu QA
        qa_pairs = self.data_generator.generate_qa_pairs(content, category)
        
        # Tạo dữ liệu phân loại
        classification_data = self.data_generator.generate_classification_data(content)
        
        self.stats['qa_pairs_generated'] += len(qa_pairs)
        self.stats['classification_samples'] += len(classification_data)
        
        return {
            'qa_pairs': qa_pairs,
            'classification': classification_data
        }
    
    def save_data(self, all_data: Dict[str, List]):
        """Lưu dữ liệu"""
        for task_type, data in all_data.items():
            if data:
                filename = f"{Config.OUTPUT_DIR}/{task_type}.jsonl"
                with open(filename, 'w', encoding='utf-8') as f:
                    for item in data:
                        f.write(json.dumps(item, ensure_ascii=False) + '\n')
                logger.info(f"Saved {len(data)} {task_type} samples")
    
    def run(self, limit: int = 11640):
        """Chạy quá trình tạo dataset"""
        logger.info(f"Starting dataset generation for {limit} records")
        
        all_data = {
            'qa_pairs': [],
            'classification': []
        }
        
        batch_size = 500  # Xử lý theo batch để tối ưu memory
        processed_count = 0
        
        for doc in self.db_manager.get_documents(limit):
            processed_data = self.process_document(doc)
            if processed_data:
                for task_type, data in processed_data.items():
                    all_data[task_type].extend(data)
                
                self.stats['documents_processed'] += 1
                processed_count += 1
                
                # Log progress mỗi 500 documents
                if processed_count % 500 == 0:
                    logger.info(f"Processed {processed_count}/{limit} documents")
                    logger.info(f"Generated: {len(all_data['qa_pairs'])} QA pairs, {len(all_data['classification'])} classification samples")
                
                # Lưu dữ liệu theo batch để tránh memory overflow
                if processed_count % 2000 == 0:
                    self.save_batch_data(all_data, processed_count // 2000)
                    # Reset data để tiết kiệm memory
                    all_data = {'qa_pairs': [], 'classification': []}
        
        # Lưu batch cuối cùng
        if all_data['qa_pairs'] or all_data['classification']:
            self.save_batch_data(all_data, (processed_count // 2000) + 1)
        
        # Merge tất cả batch files
        self.merge_batch_files()
        
        # Log thống kê cuối
        logger.info(f"""
        COMPLETED:
        - Documents processed: {self.stats['documents_processed']}
        - QA pairs generated: {self.stats['qa_pairs_generated']}
        - Classification samples: {self.stats['classification_samples']}
        - Output directory: {Config.OUTPUT_DIR}
        """)
    
    def save_batch_data(self, data: Dict[str, List], batch_num: int):
        """Lưu dữ liệu theo batch"""
        for task_type, items in data.items():
            if items:
                filename = f"{Config.OUTPUT_DIR}/{task_type}_batch_{batch_num}.jsonl"
                with open(filename, 'w', encoding='utf-8') as f:
                    for item in items:
                        f.write(json.dumps(item, ensure_ascii=False) + '\n')
                logger.info(f"Saved batch {batch_num}: {len(items)} {task_type} samples")
    
    def merge_batch_files(self):
        """Merge tất cả batch files thành file cuối cùng"""
        import glob
        
        for task_type in ['qa_pairs', 'classification']:
            batch_files = glob.glob(f"{Config.OUTPUT_DIR}/{task_type}_batch_*.jsonl")
            if batch_files:
                final_file = f"{Config.OUTPUT_DIR}/{task_type}.jsonl"
                with open(final_file, 'w', encoding='utf-8') as outfile:
                    for batch_file in sorted(batch_files):
                        with open(batch_file, 'r', encoding='utf-8') as infile:
                            outfile.write(infile.read())
                        # Xóa batch file sau khi merge
                        Path(batch_file).unlink()
                logger.info(f"Merged {len(batch_files)} batch files into {final_file}")

if __name__ == "__main__":
    generator = LegalDatasetGenerator()
    generator.run(limit=11640)  # Xử lý tất cả 11640 records

In [5]:
import os
import json
import torch
import logging
import transformers
import sys
import numpy as np
from pathlib import Path
from datetime import datetime
from typing import List, Dict, Optional
from dataclasses import dataclass
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from transformers import (
    AutoTokenizer, AutoModelForQuestionAnswering,
    TrainingArguments, Trainer, DataCollatorWithPadding,
    EarlyStoppingCallback
)
from datasets import Dataset, DatasetDict, load_dataset
from huggingface_hub import HfApi, login, create_repo, upload_folder
import wandb
import importlib.util
from tqdm import tqdm

print(f"Transformers version: {transformers.__version__}")
print(f"Transformers location: {importlib.util.find_spec('transformers').origin}")

# Setup logging with rotation
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.handlers.RotatingFileHandler(
            'phobert_finetuning.log',
            maxBytes=10*1024*1024,
            backupCount=5,
            encoding='utf-8'
        ),
        logging.StreamHandler()
    ]
)

try:
    sys.stdout.reconfigure(encoding='utf-8')
    sys.stderr.reconfigure(encoding='utf-8')
except Exception:
    pass

logger = logging.getLogger(__name__)

@dataclass
class FineTuningConfig:
    # Model settings
    model_name: str = "vinai/phobert-base"
    model_output_dir: str = "phobert-legal-qa-finetuned"
    
    # Data settings
    data_file: str = "phobert_training_data/qa_pairs.jsonl"
    max_length: int = 256
    doc_stride: int = 128
    max_query_length: int = 64
    max_answer_length: int = 256
    
    # Training settings
    learning_rate: float = 2e-5
    num_train_epochs: int = 3
    per_device_train_batch_size: int = 4
    per_device_eval_batch_size: int = 8
    gradient_accumulation_steps: int = 4
    warmup_ratio: float = 0.1
    weight_decay: float = 0.01
    fp16: bool = True
    dataloader_num_workers: int = 4
    
    # Evaluation settings
    evaluation_strategy: str = "steps"
    eval_steps: int = 200
    save_steps: int = 200
    save_total_limit: int = 3
    load_best_model_at_end: bool = True
    metric_for_best_model: str = "eval_f1"
    greater_is_better: bool = True
    
    # Early stopping
    early_stopping_patience: int = 3
    early_stopping_threshold: float = 0.001
    
    # Logging
    logging_steps: int = 50
    report_to: Optional[str] = None  # Set to "wandb" for Weights & Biases
    run_name: str = f"phobert-legal-qa-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
    
    # Wandb settings
    wandb_api_key: Optional[str] = None
    wandb_project: str = "phobert-legal-qa"
    
    # Validation split
    test_size: float = 0.15
    random_state: int = 42
    
    # Hugging Face Hub settings
    push_to_hub: bool = False
    hf_token: Optional[str] = None
    hf_repo_name: str = "phobert-legal-qa-v2"
    hf_username: Optional[str] = None
    hub_model_id: Optional[str] = None
    hub_private_repo: bool = False
    commit_message: str = "Fine-tuned PhoBERT for Vietnamese Legal QA - Updated Dataset"

class LegalQADataProcessor:
    def __init__(self, config: FineTuningConfig):
        self.config = config
        self.tokenizer = AutoTokenizer.from_pretrained(config.model_name)
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
    
    def load_qa_data(self) -> List[Dict]:
        """Load QA data from JSONL file using streaming."""
        data_file = Path(self.config.data_file)
        if not data_file.exists():
            logger.error(f"Data file not found: {data_file}")
            return []
        
        try:
            dataset = load_dataset('json', data_files=str(data_file), split='train')
            all_qa_pairs = [dict(item) for item in tqdm(dataset, desc="Loading QA pairs")]
            logger.info(f"Loaded {len(all_qa_pairs)} QA pairs from {data_file}")
            return all_qa_pairs
        except Exception as e:
            logger.error(f"Error loading {data_file}: {e}")
            return []
    
    def preprocess_qa_data(self, qa_pairs: List[Dict]) -> List[Dict]:
        """Preprocess QA pairs for PhoBERT training."""
        processed_data = []
        skipped_count = 0
        required_fields = {"context", "question", "answer"}
        
        for idx, qa_pair in enumerate(tqdm(qa_pairs, desc="Preprocessing QA pairs")):
            try:
                if not all(field in qa_pair for field in required_fields):
                    logger.debug(f"Skipping QA pair {idx}: missing required fields")
                    skipped_count += 1
                    continue
                
                context = qa_pair["context"].strip()
                question = qa_pair["question"].strip()
                answer = qa_pair["answer"].strip()
                category = qa_pair.get("category", "Unknown")
                
                if not context or not question or not answer:
                    logger.debug(f"Skipping QA pair {idx}: empty required fields")
                    skipped_count += 1
                    continue
                
                answer_start = context.find(answer)
                if answer_start == -1:
                    answer_start = context.lower().find(answer.lower())
                    if answer_start != -1:
                        answer = context[answer_start:answer_start + len(answer)]
                    else:
                        logger.debug(f"Skipping QA pair {idx}: answer '{answer[:50]}...' not found in context")
                        skipped_count += 1
                        continue
                
                if answer_start + len(answer) > len(context):
                    logger.debug(f"Skipping QA pair {idx}: invalid answer span")
                    skipped_count += 1
                    continue
                
                processed_data.append({
                    "id": f"qa_{idx}",
                    "context": context,
                    "question": question,
                    "answer": {
                        "text": [answer],
                        "answer_start": [answer_start]
                    },
                    "category": category
                })
            except Exception as e:
                logger.warning(f"Error processing QA pair {idx}: {e}")
                skipped_count += 1
                continue
        
        logger.info(f"Processed {len(processed_data)} valid QA pairs (skipped {skipped_count})")
        return processed_data
    
    def tokenize_function(self, examples):
        """Tokenize examples for PhoBERT."""
        input_ids = []
        attention_mask = []
        start_positions = []
        end_positions = []

        for i in range(len(examples["question"])):
            question = examples["question"][i]
            context = examples["context"][i]
            answer = examples["answer"][i]

            # Xử lý trường hợp answer là string hoặc dict
            if isinstance(answer, dict):
                answer_text = answer.get("text", [""])[0]
                answer_start = answer.get("answer_start", [0])[0]
            elif isinstance(answer, str):
                answer_text = answer
                answer_start = context.find(answer_text)
                if answer_start == -1:
                    answer_start = 0
            else:
                answer_text = ""
                answer_start = 0

            # Tokenize question và context riêng biệt để xác định số token
            question_tokens = self.tokenizer.tokenize(question)
            context_tokens = self.tokenizer.tokenize(context)
            answer_tokens = self.tokenizer.tokenize(answer_text)

            # Tổng số token question + 2 ([CLS] và [SEP])
            context_start = len(question_tokens) + 2

            # Tìm vị trí answer trong context tokens
            def find_sublist(lst, sublst):
                for idx in range(len(lst) - len(sublst) + 1):
                    if lst[idx:idx+len(sublst)] == sublst:
                        return idx
                return -1

            answer_token_start = find_sublist(context_tokens, answer_tokens)
            if answer_token_start == -1:
                start_positions.append(0)
                end_positions.append(0)
            else:
                start_positions.append(context_start + answer_token_start)
                end_positions.append(context_start + answer_token_start + len(answer_tokens) - 1)

            # Tokenize full input
            encoded = self.tokenizer(
                question,
                context,
                truncation="only_second",
                max_length=self.config.max_length,
                padding="max_length",
                return_tensors="pt"
            )

            input_ids.append(encoded["input_ids"][0])
            attention_mask.append(encoded["attention_mask"][0])

        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "start_positions": start_positions,
            "end_positions": end_positions
        }
    
    def prepare_datasets(self, qa_data: List[Dict]) -> DatasetDict:
        """Prepare training and validation datasets."""
        categories = [item["category"] for item in qa_data]
        
        try:
            train_data, val_data = train_test_split(
                qa_data,
                test_size=self.config.test_size,
                random_state=self.config.random_state,
                stratify=categories
            )
        except ValueError:
            logger.warning("Stratified split failed, using random split")
            train_data, val_data = train_test_split(
                qa_data,
                test_size=self.config.test_size,
                random_state=self.config.random_state
            )
        
        logger.info(f"Train size: {len(train_data)}, Validation size: {len(val_data)}")
        train_categories = [item["category"] for item in train_data]
        val_categories = [item["category"] for item in val_data]
        logger.info("Training set categories: " + 
                   ", ".join([f"{cat}: {train_categories.count(cat)}" for cat in set(train_categories)]))
        logger.info("Validation set categories: " + 
                   ", ".join([f"{cat}: {val_categories.count(cat)}" for cat in set(val_categories)]))
        
        train_dataset = Dataset.from_list(train_data)
        val_dataset = Dataset.from_list(val_data)
        
        logger.info("Tokenizing datasets...")
        columns_to_remove = ["context", "question", "answer", "category"]
        train_dataset = train_dataset.map(
            self.tokenize_function,
            batched=True,
            remove_columns=columns_to_remove,
            desc="Tokenizing train dataset"
        )
        val_dataset = val_dataset.map(
            self.tokenize_function,
            batched=True,
            remove_columns=columns_to_remove,
            desc="Tokenizing validation dataset"
        )
        
        return DatasetDict({"train": train_dataset, "validation": val_dataset})

class HuggingFaceUploader:
    def __init__(self, config: FineTuningConfig):
        self.config = config
        self.api = HfApi()
    
    def login_to_hub(self) -> bool:
        """Login to Hugging Face Hub."""
        if self.config.hf_token:
            logger.info("Logging in to Hugging Face Hub...")
            login(token=self.config.hf_token)
            return True
        try:
            user_info = self.api.whoami()
            logger.info(f"Already logged in as: {user_info['name']}")
            return True
        except Exception:
            logger.warning("Not logged in to Hugging Face Hub. Please run 'huggingface-cli login' first")
            return False
    
    def create_model_card(self, training_info: Dict) -> str:
        """Create a model card for the repository."""
        return f"""---
language: vi
tags:
- phobert
- question-answering
- vietnamese
- legal-qa
- pytorch
- transformers
license: apache-2.0
datasets:
- custom-legal-qa
metrics:
- f1
- accuracy
model-index:
- name: {self.config.hf_repo_name}
  results:
  - task:
      type: question-answering
      name: Question Answering
    metrics:
    - type: f1
      value: {training_info.get('eval_result', {}).get('eval_f1', 'N/A')}
      name: F1 Score
    - type: accuracy
      value: {training_info.get('eval_result', {}).get('eval_accuracy', 'N/A')}
      name: Accuracy
---

# PhoBERT Fine-tuned for Vietnamese Legal QA

## Model Description

This model is a fine-tuned version of [vinai/phobert-base](https://huggingface.co/vinai/phobert-base) for Vietnamese legal question answering.

## Training Details

### Training Data
- **Dataset**: Custom Vietnamese Legal QA dataset
- **Total QA pairs**: {training_info.get('dataset_info', {}).get('total_qa_pairs', 'N/A')}
- **Training samples**: {training_info.get('dataset_info', {}).get('train_size', 'N/A')}
- **Validation samples**: {training_info.get('dataset_info', {}).get('validation_size', 'N/A')}
- **Categories**: {', '.join(training_info.get('dataset_info', {}).get('categories', []))}

### Training Configuration
- **Base model**: {self.config.model_name}
- **Learning rate**: {self.config.learning_rate}
- **Training epochs**: {self.config.num_train_epochs}
- **Batch size**: {self.config.per_device_train_batch_size}
- **Max sequence length**: {self.config.max_length}

### Training Results
- **Training Loss**: {training_info.get('train_result', {}).get('training_loss', 'N/A')}
- **Validation F1**: {training_info.get('eval_result', {}).get('eval_f1', 'N/A')}
- **Validation Accuracy**: {training_info.get('eval_result', {}).get('eval_accuracy', 'N/A')}

## Usage

```python
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
import torch

tokenizer = AutoTokenizer.from_pretrained("{self.config.hub_model_id or self.config.hf_repo_name}")
model = AutoModelForQuestionAnswering.from_pretrained("{self.config.hub_model_id or self.config.hf_repo_name}")

question = "Quy định này áp dụng cho ai?"
context = "Thanh niên là công dân Việt Nam từ đủ 16 tuổi đến 30 tuổi."

inputs = tokenizer(question, context, return_tensors="pt", max_length=512, truncation=True)
with torch.no_grad():
    outputs = model(**inputs)

start_idx = torch.argmax(outputs.start_logits)
end_idx = torch.argmax(outputs.end_logits)
answer = tokenizer.decode(inputs["input_ids"][0][start_idx:end_idx+1])
print(f"Answer: {{answer}}")
```

## Categories

{chr(10).join([f"- {cat}" for cat in training_info.get('dataset_info', {}).get('categories', [])])}

## Limitations

This model is trained on Vietnamese legal documents and may not generalize to other domains or languages.

## Training Framework

- Framework: Transformers {training_info.get('transformers_version', '4.x')}
- Language: Vietnamese
- License: Apache 2.0
"""
    
    def create_repository(self) -> Optional[str]:
        """Create repository on Hugging Face Hub."""
        try:
            repo_id = self.config.hub_model_id or f"{self.config.hf_username}/{self.config.hf_repo_name}"
            logger.info(f"Creating repository: {repo_id}")
            create_repo(
                repo_id=repo_id,
                private=self.config.hub_private_repo,
                exist_ok=True
            )
            logger.info(f"Repository created/verified: {repo_id}")
            return repo_id
        except Exception as e:
            logger.error(f"Failed to create repository: {e}")
            return None
    
    def upload_model(self, model_path: str, training_info: Dict) -> bool:
        """Upload model to Hugging Face Hub."""
        if not self.login_to_hub():
            logger.error("Failed to login to Hugging Face Hub")
            return False
        
        try:
            repo_id = self.create_repository()
            if not repo_id:
                return False
            
            model_card = self.create_model_card(training_info)
            model_card_path = Path(model_path) / "README.md"
            with open(model_card_path, 'w', encoding='utf-8') as f:
                f.write(model_card)
            
            training_info_path = Path(model_path) / "training_info.json"
            with open(training_info_path, 'w', encoding='utf-8') as f:
                json.dump(training_info, f, ensure_ascii=False, indent=2)
            
            logger.info(f"Uploading model to {repo_id}...")
            upload_folder(
                folder_path=model_path,
                repo_id=repo_id,
                commit_message=self.config.commit_message,
                ignore_patterns=["*.log", "__pycache__", "*.pyc"]
            )
            logger.info(f"✅ Model uploaded to: https://huggingface.co/{repo_id}")
            return True
        except Exception as e:
            logger.error(f"Failed to upload model: {e}")
            return False

class LegalQATrainer:
    def __init__(self, config: FineTuningConfig):
        self.config = config
        self.tokenizer = AutoTokenizer.from_pretrained(config.model_name)
        self.model = None
        self.trainer = None
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
    
    def load_model(self):
        """Load PhoBERT model for question answering."""
        logger.info(f"Loading model: {self.config.model_name}")
        self.model = AutoModelForQuestionAnswering.from_pretrained(
            self.config.model_name,
            torch_dtype=torch.float32,
        )
        self.model.resize_token_embeddings(len(self.tokenizer))
        logger.info(f"Model max_position_embeddings: {self.model.config.max_position_embeddings}")
        logger.info("Model loaded successfully!")
    
    def compute_metrics(self, eval_pred):
        """Compute metrics for evaluation."""
        predictions, labels = eval_pred
        start_predictions, end_predictions = predictions
        start_labels, end_labels = labels
        
        start_accuracy = accuracy_score(start_labels, np.argmax(start_predictions, axis=1))
        end_accuracy = accuracy_score(end_labels, np.argmax(end_predictions, axis=1))
        start_f1 = f1_score(start_labels, np.argmax(start_predictions, axis=1), average='macro')
        end_f1 = f1_score(end_labels, np.argmax(end_predictions, axis=1), average='macro')
        
        return {
            "accuracy": (start_accuracy + end_accuracy) / 2,
            "f1": (start_f1 + end_f1) / 2,
            "start_accuracy": start_accuracy,
            "end_accuracy": end_accuracy,
            "start_f1": start_f1,
            "end_f1": end_f1
        }
    
    def setup_wandb(self):
        """Setup Weights & Biases integration."""
        if self.config.report_to == "wandb":
            try:
                # Thử các cách khác nhau để login vào wandb
                if self.config.wandb_api_key:
                    # Cách 1: Sử dụng API key trực tiếp
                    wandb.login(key=self.config.wandb_api_key)
                    logger.info("✅ Wandb logged in successfully with provided API key")
                elif os.getenv("WANDB_API_KEY"):
                    # Cách 2: Sử dụng environment variable
                    wandb.login(key=os.getenv("WANDB_API_KEY"))
                    logger.info("✅ Wandb logged in successfully with environment variable")
                else:
                    # Cách 3: Thử login interactive (sẽ prompt cho API key)
                    try:
                        wandb.login()
                        logger.info("✅ Wandb logged in interactively")
                    except Exception as e:
                        logger.warning(f"Interactive wandb login failed: {e}")
                        logger.info("Disabling wandb logging...")
                        self.config.report_to = None
                        return False
                
                # Initialize wandb run
                wandb.init(
                    project=self.config.wandb_project,
                    name=self.config.run_name,
                    config=self.config.__dict__
                )
                logger.info(f"✅ Wandb initialized for project: {self.config.wandb_project}")
                return True
                
            except Exception as e:
                logger.error(f"Failed to setup wandb: {e}")
                logger.info("Disabling wandb logging...")
                self.config.report_to = None
                return False
        return False
    
    def setup_training(self, datasets: DatasetDict):
        """Setup training arguments and trainer."""
        training_args = TrainingArguments(
            output_dir=self.config.model_output_dir,
            overwrite_output_dir=True,
            num_train_epochs=self.config.num_train_epochs,
            per_device_train_batch_size=self.config.per_device_train_batch_size,
            per_device_eval_batch_size=self.config.per_device_eval_batch_size,
            gradient_accumulation_steps=self.config.gradient_accumulation_steps,
            learning_rate=self.config.learning_rate,
            weight_decay=self.config.weight_decay,
            warmup_ratio=self.config.warmup_ratio,
            fp16=self.config.fp16,
            dataloader_num_workers=self.config.dataloader_num_workers,
            evaluation_strategy=self.config.evaluation_strategy,
            eval_steps=self.config.eval_steps,
            save_steps=self.config.save_steps,
            save_total_limit=self.config.save_total_limit,
            load_best_model_at_end=self.config.load_best_model_at_end,
            metric_for_best_model=self.config.metric_for_best_model,
            greater_is_better=self.config.greater_is_better,
            logging_steps=self.config.logging_steps,
            report_to=self.config.report_to,
            run_name=self.config.run_name,
            seed=self.config.random_state,
            push_to_hub=False  # Handled by HuggingFaceUploader
        )
        
        data_collator = DataCollatorWithPadding(
            tokenizer=self.tokenizer,
            pad_to_multiple_of=8 if self.config.fp16 else None
        )
        
        self.trainer = Trainer(
            model=self.model,
            args=training_args,
            train_dataset=datasets["train"],
            eval_dataset=datasets["validation"],
            tokenizer=self.tokenizer,
            data_collator=data_collator,
            compute_metrics=self.compute_metrics,
            callbacks=[
                EarlyStoppingCallback(
                    early_stopping_patience=self.config.early_stopping_patience,
                    early_stopping_threshold=self.config.early_stopping_threshold
                )
            ]
        )
        logger.info("Training setup completed!")
    
    def train(self):
        """Start training."""
        logger.info("Starting training...")
        
        # Setup wandb if needed
        wandb_setup_success = self.setup_wandb()
        
        train_result = self.trainer.train()
        
        logger.info("Saving final model...")
        self.trainer.save_model()
        self.tokenizer.save_pretrained(self.config.model_output_dir)
        
        logger.info("Running final evaluation...")
        eval_result = self.trainer.evaluate()
        logger.info(f"Training completed! Final loss: {train_result.training_loss}, Eval: {eval_result}")
        
        # Finish wandb run if it was setup successfully
        if wandb_setup_success:
            wandb.finish()
        
        return train_result, eval_result


def main():
    """Fine-tune PhoBERT model for Vietnamese Legal QA and optionally upload to Hugging Face Hub."""
    config = FineTuningConfig(
        model_name="vinai/phobert-base",
        model_output_dir="phobert-legal-qa-finetuned",
        data_file="phobert_training_data/qa_pairs.jsonl",
        learning_rate=2e-5,
        num_train_epochs=3,
        per_device_train_batch_size=4,
        per_device_eval_batch_size=8,
        gradient_accumulation_steps=4,
        fp16=True,
        evaluation_strategy="steps",
        eval_steps=200,
        save_steps=200,
        early_stopping_patience=3,
        max_length=256,
        test_size=0.15,
        logging_steps=50,
        report_to="wandb",  # Enable wandb logging
        wandb_api_key="a365649659a3eec4d90673a8e42f6a4c53604751",  # Thêm API key của bạn vào đây
        wandb_project="phobert-legal-qa",
        push_to_hub=True,
        hf_token=os.getenv('HF_TOKEN'),
        hf_username="huynguyen251",
        hf_repo_name="phobert-legal-qa-v2",
        hub_model_id="huynguyen251/phobert-legal-qa-v2",
        hub_private_repo=False,
        commit_message="Fine-tuned PhoBERT for Vietnamese Legal QA - Updated Dataset"
    )
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    logger.info(f"Using device: {device}")
    if torch.cuda.is_available():
        logger.info(f"GPU: {torch.cuda.get_device_name(0)}")
        logger.info(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
    
    output_dir = Path(config.model_output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    if not output_dir.is_dir() or not os.access(output_dir, os.W_OK):
        logger.error(f"Output directory {config.model_output_dir} is not writable!")
        return
    
    data_file = Path(config.data_file)
    if not data_file.exists():
        logger.error(f"Data file {config.data_file} does not exist!")
        return
    
    try:
        logger.info("Loading and preprocessing data...")
        data_processor = LegalQADataProcessor(config)
        qa_pairs = data_processor.load_qa_data()
        if not qa_pairs:
            logger.error("No QA data found! Check data file for valid JSONL data.")
            return
        
        processed_qa_pairs = data_processor.preprocess_qa_data(qa_pairs)
        datasets = data_processor.prepare_datasets(processed_qa_pairs)
        if not datasets["train"] or not datasets["validation"]:
            logger.error("Empty training or validation dataset!")
            return
        
        logger.info("Sample QA pairs:")
        for i, sample in enumerate(processed_qa_pairs[:3]):
            logger.info(f"Sample {i+1}:")
            logger.info(f"  Question: {sample.get('question', '')[:100]}...")
            logger.info(f"  Context: {sample.get('context', '')[:100]}...")
            logger.info(f"  Answer: {sample.get('answer', {}).get('text', [''])[0][:100]}...")
        
        logger.info("Setting up model and training...")
        trainer = LegalQATrainer(config)
        trainer.load_model()
        trainer.setup_training(datasets)
        train_result, eval_metrics = trainer.train()
        
        output_dir = Path(config.model_output_dir)
        with open(output_dir / "eval_metrics.json", "w", encoding="utf-8") as f:
            json.dump(eval_metrics, f, ensure_ascii=False, indent=2)
        
        logger.info(f"✅ Training complete. Model saved to {config.model_output_dir}")
        
        if config.push_to_hub:
            logger.info("Uploading model to Hugging Face Hub...")
            uploader = HuggingFaceUploader(config)
            training_info = {
                "eval_result": eval_metrics,
                "train_result": {"training_loss": train_result.training_loss},
                "dataset_info": {
                    "total_qa_pairs": len(qa_pairs),
                    "train_size": len(datasets["train"]),
                    "validation_size": len(datasets["validation"]),
                    "categories": list(set([item["category"] for item in qa_pairs]))
                },
                "transformers_version": transformers.__version__
            }
            uploader.upload_model(config.model_output_dir, training_info)
    
    except Exception as e:
        logger.error(f"❌ Fine-tuning failed: {e}")
        raise

if __name__ == "__main__":
    main()

2025-06-01 21:07:56,693 - INFO - Using device: cuda
2025-06-01 21:07:56,694 - INFO - GPU: NVIDIA GeForce RTX 3060
2025-06-01 21:07:56,697 - INFO - GPU Memory: 12.9 GB
2025-06-01 21:07:56,698 - INFO - Loading and preprocessing data...


Transformers version: 4.44.2
Transformers location: d:\Source\AVG\VN-Law\chat-service\env\lib\site-packages\transformers\__init__.py




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

Loading QA pairs: 100%|██████████| 156349/156349 [00:04<00:00, 34728.81it/s]
2025-06-01 21:08:03,954 - INFO - Loaded 156349 QA pairs from phobert_training_data\qa_pairs.jsonl


[A[A

[A[A

[A[A

  0%|          | 0/18087 [15:44<?, ?it/s]
Preprocessing QA pairs: 100%|██████████| 156349/156349 [00:01<00:00, 136797.19it/s]
2025-06-01 21:08:05,115 - INFO - Processed 113497 valid QA pairs (skipped 42852)
2025-06-01 21:08:05,212 - INFO - Train size: 96472, Validation size: 17025
2025-06-01 21:08:05,332 - INFO - Training set categories: Công nghiệp: 1087, Thuế, phí, lệ phí, các khoản thu khác: 705, Đất đai: 14565, Dân số, gia đình, trẻ em, bình đẳng giới: 

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
eval/accuracy,▁▇█
eval/end_accuracy,▁██
eval/end_f1,▁██
eval/f1,▁▇█
eval/loss,█▁▁
eval/runtime,▆█▁
eval/samples_per_second,▃▁█
eval/start_accuracy,▁▅█
eval/start_f1,▁▂█
eval/steps_per_second,▃▁█

0,1
eval/accuracy,0.95427
eval/end_accuracy,0.93463
eval/end_f1,0.86916
eval/f1,0.4573
eval/loss,0.12097
eval/runtime,96.5225
eval/samples_per_second,176.384
eval/start_accuracy,0.97392
eval/start_f1,0.04544
eval/steps_per_second,22.057


2025-06-01 21:09:28,341 - INFO - ✅ Wandb initialized for project: phobert-legal-qa
                                         

[A[A                                               
  0%|          | 0/18087 [29:16<?, ?it/s]           
[A

{'loss': 5.4244, 'grad_norm': 8.954318046569824, 'learning_rate': 5.417357656163627e-07, 'epoch': 0.01}


                                         

[A[A                                               
  0%|          | 0/18087 [29:49<?, ?it/s]            
[A

{'loss': 5.1885, 'grad_norm': 6.557638168334961, 'learning_rate': 1.0834715312327253e-06, 'epoch': 0.02}


                                         

[A[A                                               
  0%|          | 0/18087 [30:20<?, ?it/s]            
[A

{'loss': 4.6824, 'grad_norm': 6.624057769775391, 'learning_rate': 1.6141514648977336e-06, 'epoch': 0.02}


                                         

[A[A                                               
  0%|          | 0/18087 [30:55<?, ?it/s]            
[A

{'loss': 3.9743, 'grad_norm': 6.447659015655518, 'learning_rate': 2.1669430624654506e-06, 'epoch': 0.03}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

{'eval_loss': 3.2860655784606934, 'eval_accuracy': 0.5348311306901615, 'eval_f1': 0.10898462727065836, 'eval_start_accuracy': 0.7788546255506608, 'eval_end_accuracy': 0.29080763582966224, 'eval_start_f1': 0.012591056715843358, 'eval_end_f1': 0.20537819782547337, 'eval_runtime': 127.3249, 'eval_samples_per_second': 133.713, 'eval_steps_per_second': 16.721, 'epoch': 0.03}


                                         

[A[A                                               
  0%|          | 0/18087 [33:41<?, ?it/s]            
[A

{'loss': 3.0621, 'grad_norm': 5.946743965148926, 'learning_rate': 2.7197346600331676e-06, 'epoch': 0.04}


                                         

[A[A                                               
  0%|          | 0/18087 [34:17<?, ?it/s]            
[A

{'loss': 1.9122, 'grad_norm': 7.603392601013184, 'learning_rate': 3.272526257600885e-06, 'epoch': 0.05}


                                         

[A[A                                               
  0%|          | 0/18087 [34:49<?, ?it/s]            
[A

{'loss': 0.8663, 'grad_norm': 4.718493461608887, 'learning_rate': 3.825317855168602e-06, 'epoch': 0.06}


                                         

[A[A                                               
  0%|          | 0/18087 [35:25<?, ?it/s]            
[A

{'loss': 0.4477, 'grad_norm': 1.8805421590805054, 'learning_rate': 4.367053620784965e-06, 'epoch': 0.07}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

{'eval_loss': 0.23289425671100616, 'eval_accuracy': 0.9048458149779737, 'eval_f1': 0.424233749861785, 'eval_start_accuracy': 0.8920998531571219, 'eval_end_accuracy': 0.9175917767988253, 'eval_start_f1': 0.018134102572062404, 'eval_end_f1': 0.8303333971515077, 'eval_runtime': 129.7527, 'eval_samples_per_second': 131.211, 'eval_steps_per_second': 16.408, 'epoch': 0.07}


                                         

[A[A                                               
  0%|          | 0/18087 [38:20<?, ?it/s]            
[A

{'loss': 0.2926, 'grad_norm': 1.6680586338043213, 'learning_rate': 4.919845218352681e-06, 'epoch': 0.07}


                                         

[A[A                                               
  0%|          | 0/18087 [38:58<?, ?it/s]            
[A

{'loss': 0.2317, 'grad_norm': 1.3913531303405762, 'learning_rate': 5.472636815920398e-06, 'epoch': 0.08}


                                         

[A[A                                               
  0%|          | 0/18087 [39:37<?, ?it/s]            
[A

{'loss': 0.2013, 'grad_norm': 0.8732656836509705, 'learning_rate': 6.025428413488116e-06, 'epoch': 0.09}


                                         

[A[A                                               
  0%|          | 0/18087 [40:17<?, ?it/s]            
[A

{'loss': 0.1501, 'grad_norm': 0.4801824986934662, 'learning_rate': 6.578220011055833e-06, 'epoch': 0.1}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

{'eval_loss': 0.10375536233186722, 'eval_accuracy': 0.9687224669603525, 'eval_f1': 0.47707631709814124, 'eval_start_accuracy': 0.9753891336270191, 'eval_end_accuracy': 0.9620558002936858, 'eval_start_f1': 0.051241151469230285, 'eval_end_f1': 0.9029114827270522, 'eval_runtime': 115.3633, 'eval_samples_per_second': 147.577, 'eval_steps_per_second': 18.455, 'epoch': 0.1}


                                         

[A[A                                               
  0%|          | 0/18087 [42:45<?, ?it/s]            
[A

{'loss': 0.147, 'grad_norm': 3.085040807723999, 'learning_rate': 7.131011608623549e-06, 'epoch': 0.11}


                                         

[A[A                                               
  0%|          | 0/18087 [43:14<?, ?it/s]            
[A

{'loss': 0.1356, 'grad_norm': 2.0271573066711426, 'learning_rate': 7.672747374239912e-06, 'epoch': 0.12}


                                         

[A[A                                               
  0%|          | 0/18087 [43:43<?, ?it/s]            
[A

{'loss': 0.1604, 'grad_norm': 0.833656370639801, 'learning_rate': 8.22553897180763e-06, 'epoch': 0.12}


                                         

[A[A                                               
  0%|          | 0/18087 [44:02<?, ?it/s]            
[A

{'loss': 0.2731, 'grad_norm': 4.890761375427246, 'learning_rate': 8.778330569375346e-06, 'epoch': 0.13}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

{'eval_loss': 0.1122935563325882, 'eval_accuracy': 0.9534801762114538, 'eval_f1': 0.49608082825261623, 'eval_start_accuracy': 0.9327459618208517, 'eval_end_accuracy': 0.9742143906020558, 'eval_start_f1': 0.06000273165728297, 'eval_end_f1': 0.9321589248479495, 'eval_runtime': 133.8336, 'eval_samples_per_second': 127.21, 'eval_steps_per_second': 15.908, 'epoch': 0.13}


                                         

[A[A                                               
  0%|          | 0/18087 [46:58<?, ?it/s]            
[A

{'loss': 0.1276, 'grad_norm': 1.1853266954421997, 'learning_rate': 9.331122166943063e-06, 'epoch': 0.14}


                                         

[A[A                                               
  0%|          | 0/18087 [47:35<?, ?it/s]            
[A

{'loss': 0.0954, 'grad_norm': 7.255343437194824, 'learning_rate': 9.88391376451078e-06, 'epoch': 0.15}


                                         

[A[A                                               
  0%|          | 0/18087 [48:09<?, ?it/s]            
[A

{'loss': 0.1081, 'grad_norm': 0.5000291466712952, 'learning_rate': 1.0436705362078497e-05, 'epoch': 0.16}


                                         

[A[A                                               
  0%|          | 0/18087 [48:47<?, ?it/s]             
[A

{'loss': 0.1257, 'grad_norm': 0.3002016544342041, 'learning_rate': 1.0989496959646216e-05, 'epoch': 0.17}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

{'eval_loss': 0.08298086374998093, 'eval_accuracy': 0.9772980910425844, 'eval_f1': 0.5088194880993329, 'eval_start_accuracy': 0.9776211453744493, 'eval_end_accuracy': 0.9769750367107195, 'eval_start_f1': 0.07546950984664719, 'eval_end_f1': 0.9421694663520186, 'eval_runtime': 127.1174, 'eval_samples_per_second': 133.931, 'eval_steps_per_second': 16.748, 'epoch': 0.17}


                                         

[A[A                                               
  0%|          | 0/18087 [51:32<?, ?it/s]             
[A

{'loss': 0.0988, 'grad_norm': 1.4803558588027954, 'learning_rate': 1.1542288557213931e-05, 'epoch': 0.17}


                                         

[A[A                                               
  0%|          | 0/18087 [52:11<?, ?it/s]             
[A

{'loss': 0.1159, 'grad_norm': 5.256414413452148, 'learning_rate': 1.2095080154781648e-05, 'epoch': 0.18}


                                         

[A[A                                               
  0%|          | 0/18087 [52:47<?, ?it/s]             
[A

{'loss': 0.1119, 'grad_norm': 2.2532193660736084, 'learning_rate': 1.2647871752349365e-05, 'epoch': 0.19}


                                         

[A[A                                               
  0%|          | 0/18087 [53:23<?, ?it/s]             
[A

{'loss': 0.0801, 'grad_norm': 1.1581368446350098, 'learning_rate': 1.3200663349917082e-05, 'epoch': 0.2}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

{'eval_loss': 0.06253915280103683, 'eval_accuracy': 0.9774743024963289, 'eval_f1': 0.5138705881207118, 'eval_start_accuracy': 0.9776211453744493, 'eval_end_accuracy': 0.9773274596182085, 'eval_start_f1': 0.0754751037369228, 'eval_end_f1': 0.9522660725045008, 'eval_runtime': 129.6143, 'eval_samples_per_second': 131.351, 'eval_steps_per_second': 16.426, 'epoch': 0.2}


                                         

[A[A                                               
  0%|          | 0/18087 [56:14<?, ?it/s]             
[A

{'loss': 0.1058, 'grad_norm': 3.3523240089416504, 'learning_rate': 1.3742399115533445e-05, 'epoch': 0.21}


                                         

[A[A                                               
  0%|          | 0/18087 [56:52<?, ?it/s]             
[A

{'loss': 0.1002, 'grad_norm': 0.09368986636400223, 'learning_rate': 1.429519071310116e-05, 'epoch': 0.22}


                                         

[A[A                                               
  0%|          | 0/18087 [57:29<?, ?it/s]             
[A

{'loss': 0.0563, 'grad_norm': 2.6880834102630615, 'learning_rate': 1.4847982310668878e-05, 'epoch': 0.22}


                                         

[A[A                                               
  0%|          | 0/18087 [58:10<?, ?it/s]             
[A

{'loss': 0.0973, 'grad_norm': 10.309085845947266, 'learning_rate': 1.5400773908236596e-05, 'epoch': 0.23}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

{'eval_loss': 0.0639370009303093, 'eval_accuracy': 0.9787371512481644, 'eval_f1': 0.5438896216202269, 'eval_start_accuracy': 0.9792657856093979, 'eval_end_accuracy': 0.978208516886931, 'eval_start_f1': 0.13929932094234762, 'eval_end_f1': 0.9484799222981063, 'eval_runtime': 128.1258, 'eval_samples_per_second': 132.877, 'eval_steps_per_second': 16.616, 'epoch': 0.23}


                                           

[A[A                                               
  0%|          | 0/18087 [1:00:59<?, ?it/s]           
[A

{'loss': 0.0724, 'grad_norm': 0.07747649401426315, 'learning_rate': 1.5953565505804315e-05, 'epoch': 0.24}


                                           

[A[A                                               
  0%|          | 0/18087 [1:01:39<?, ?it/s]           
[A

{'loss': 0.0928, 'grad_norm': 0.076473668217659, 'learning_rate': 1.650635710337203e-05, 'epoch': 0.25}


                                           

[A[A                                               
  0%|          | 0/18087 [1:02:20<?, ?it/s]           
[A

{'loss': 0.085, 'grad_norm': 0.18258516490459442, 'learning_rate': 1.7059148700939746e-05, 'epoch': 0.26}


                                           

[A[A                                               
  0%|          | 0/18087 [1:02:56<?, ?it/s]           
[A

{'loss': 0.1107, 'grad_norm': 2.0234451293945312, 'learning_rate': 1.7611940298507464e-05, 'epoch': 0.27}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

{'eval_loss': 0.06893135607242584, 'eval_accuracy': 0.9774449339207049, 'eval_f1': 0.5144663543275407, 'eval_start_accuracy': 0.9775624082232012, 'eval_end_accuracy': 0.9773274596182085, 'eval_start_f1': 0.07405042404986113, 'eval_end_f1': 0.9548822846052203, 'eval_runtime': 137.3173, 'eval_samples_per_second': 123.983, 'eval_steps_per_second': 15.504, 'epoch': 0.27}


                                           

[A[A                                               
  0%|          | 0/18087 [1:05:54<?, ?it/s]           
[A

{'loss': 0.069, 'grad_norm': 2.9243485927581787, 'learning_rate': 1.816473189607518e-05, 'epoch': 0.27}


                                           

[A[A                                               
  0%|          | 0/18087 [1:06:30<?, ?it/s]           
[A

{'loss': 0.0815, 'grad_norm': 0.05162263661623001, 'learning_rate': 1.87175234936429e-05, 'epoch': 0.28}


                                           

[A[A                                               
  0%|          | 0/18087 [1:07:05<?, ?it/s]           
[A

{'loss': 0.0741, 'grad_norm': 0.12183202058076859, 'learning_rate': 1.9270315091210617e-05, 'epoch': 0.29}


                                           

[A[A                                               
  0%|          | 0/18087 [1:07:43<?, ?it/s]           
[A

{'loss': 0.0819, 'grad_norm': 3.247403621673584, 'learning_rate': 1.9823106688778332e-05, 'epoch': 0.3}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

{'eval_loss': 0.07215487957000732, 'eval_accuracy': 0.9795007342143907, 'eval_f1': 0.602910749664121, 'eval_start_accuracy': 0.979618208516887, 'eval_end_accuracy': 0.9793832599118942, 'eval_start_f1': 0.25102382987815675, 'eval_end_f1': 0.9547976694500852, 'eval_runtime': 135.7664, 'eval_samples_per_second': 125.399, 'eval_steps_per_second': 15.681, 'epoch': 0.3}


                                           

[A[A                                               
  0%|          | 0/18087 [1:10:44<?, ?it/s]           
[A

{'loss': 0.0641, 'grad_norm': 0.042116910219192505, 'learning_rate': 1.9958225826268585e-05, 'epoch': 0.31}


                                           

[A[A                                               
  0%|          | 0/18087 [1:11:20<?, ?it/s]           
[A

{'loss': 0.1036, 'grad_norm': 0.36534908413887024, 'learning_rate': 1.989679321784003e-05, 'epoch': 0.32}


                                           

[A[A                                               
  0%|          | 0/18087 [1:11:58<?, ?it/s]           
[A

{'loss': 0.0751, 'grad_norm': 0.2260214239358902, 'learning_rate': 1.9835360609411478e-05, 'epoch': 0.32}


                                           

[A[A                                               
  0%|          | 0/18087 [1:12:38<?, ?it/s]           
[A

{'loss': 0.0632, 'grad_norm': 0.03621504455804825, 'learning_rate': 1.977392800098292e-05, 'epoch': 0.33}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

{'eval_loss': 0.058555059134960175, 'eval_accuracy': 0.9779148311306902, 'eval_f1': 0.5095598212540562, 'eval_start_accuracy': 0.977856093979442, 'eval_end_accuracy': 0.9779735682819384, 'eval_start_f1': 0.08676181546376394, 'eval_end_f1': 0.9323578270443484, 'eval_runtime': 136.5016, 'eval_samples_per_second': 124.724, 'eval_steps_per_second': 15.597, 'epoch': 0.33}


                                           

[A[A                                               
  0%|          | 0/18087 [1:15:35<?, ?it/s]           
[A

{'loss': 0.0833, 'grad_norm': 0.4064314365386963, 'learning_rate': 1.9712495392554368e-05, 'epoch': 0.34}


                                           

[A[A                                               
  0%|          | 0/18087 [1:16:17<?, ?it/s]           
[A

{'loss': 0.1415, 'grad_norm': 1.703637957572937, 'learning_rate': 1.9651062784125818e-05, 'epoch': 0.35}


                                           

[A[A                                               
  0%|          | 0/18087 [1:16:59<?, ?it/s]           
[A

{'loss': 0.2726, 'grad_norm': 0.7372889518737793, 'learning_rate': 1.958963017569726e-05, 'epoch': 0.36}


                                           

[A[A                                                 
  0%|          | 0/18087 [1:17:34<?, ?it/s]           
[A

{'loss': 0.137, 'grad_norm': 0.28392699360847473, 'learning_rate': 1.9528197567268707e-05, 'epoch': 0.36}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

{'eval_loss': 0.05977020785212517, 'eval_accuracy': 0.9787958883994126, 'eval_f1': 0.5319231158110048, 'eval_start_accuracy': 0.9790308370044053, 'eval_end_accuracy': 0.9785609397944199, 'eval_start_f1': 0.11335628093325875, 'eval_end_f1': 0.9504899506887509, 'eval_runtime': 136.9823, 'eval_samples_per_second': 124.286, 'eval_steps_per_second': 15.542, 'epoch': 0.36}


                                           

[A[A                                                 
  0%|          | 0/18087 [1:20:39<?, ?it/s]           
[A

{'loss': 0.1661, 'grad_norm': 1.4994114637374878, 'learning_rate': 1.9466764958840154e-05, 'epoch': 0.37}


                                           

[A[A                                                 
  0%|          | 0/18087 [1:21:17<?, ?it/s]           
[A

{'loss': 0.1175, 'grad_norm': 0.035565998405218124, 'learning_rate': 1.94053323504116e-05, 'epoch': 0.38}


                                           

[A[A                                                 
  0%|          | 0/18087 [1:21:53<?, ?it/s]           
[A

{'loss': 0.0934, 'grad_norm': 0.5697016716003418, 'learning_rate': 1.9343899741983044e-05, 'epoch': 0.39}


                                           

[A[A                                                 
  0%|          | 0/18087 [1:22:33<?, ?it/s]           
[A

{'loss': 0.0968, 'grad_norm': 5.968558311462402, 'learning_rate': 1.9282467133554494e-05, 'epoch': 0.4}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

{'eval_loss': 0.06477497518062592, 'eval_accuracy': 0.9804698972099853, 'eval_f1': 0.5888859332600402, 'eval_start_accuracy': 0.9806754772393539, 'eval_end_accuracy': 0.9802643171806168, 'eval_start_f1': 0.21895315890119804, 'eval_end_f1': 0.9588187076188823, 'eval_runtime': 140.1122, 'eval_samples_per_second': 121.51, 'eval_steps_per_second': 15.195, 'epoch': 0.4}


                                           

[A[A                                                 
  0%|          | 0/18087 [1:24:58<?, ?it/s]           
 13%|█▎        | 2400/18087 [56:31<6:09:28,  1.41s/it]
2025-06-01 22:06:00,572 - INFO - Saving final model...


{'train_runtime': 3391.597, 'train_samples_per_second': 85.333, 'train_steps_per_second': 5.333, 'train_loss': 0.6344684727986654, 'epoch': 0.4}


2025-06-01 22:06:01,700 - INFO - Running final evaluation...
100%|██████████| 2129/2129 [02:02<00:00, 17.41it/s]
2025-06-01 22:08:14,432 - INFO - Training completed! Final loss: 0.6344684727986654, Eval: {'eval_loss': 0.07215487957000732, 'eval_accuracy': 0.9795007342143907, 'eval_f1': 0.602910749664121, 'eval_start_accuracy': 0.979618208516887, 'eval_end_accuracy': 0.9793832599118942, 'eval_start_f1': 0.25102382987815675, 'eval_end_f1': 0.9547976694500852, 'eval_runtime': 132.7064, 'eval_samples_per_second': 128.291, 'eval_steps_per_second': 16.043, 'epoch': 0.39804295546894436}


0,1
eval/accuracy,▁▇███████████
eval/end_accuracy,▁▇███████████
eval/end_f1,▁▇▇██████████
eval/f1,▁▅▆▆▇▇▇▇█▇▇██
eval/loss,█▁▁▁▁▁▁▁▁▁▁▁▁
eval/runtime,▄▅▁▆▄▅▅▇▇▇▇█▆
eval/samples_per_second,▄▄█▃▄▄▄▂▂▂▂▁▃
eval/start_accuracy,▁▅█▆█████████
eval/start_f1,▁▁▂▂▃▃▅▃█▃▄▇█
eval/steps_per_second,▄▄█▃▄▄▄▂▂▂▂▁▃

0,1
eval/accuracy,0.9795
eval/end_accuracy,0.97938
eval/end_f1,0.9548
eval/f1,0.60291
eval/loss,0.07215
eval/runtime,132.7064
eval/samples_per_second,128.291
eval/start_accuracy,0.97962
eval/start_f1,0.25102
eval/steps_per_second,16.043


2025-06-01 22:08:17,390 - INFO - ✅ Training complete. Model saved to phobert-legal-qa-finetuned
2025-06-01 22:08:17,390 - INFO - Uploading model to Hugging Face Hub...
2025-06-01 22:08:17,405 - INFO - Logging in to Hugging Face Hub...
Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
2025-06-01 22:08:17,993 - INFO - Creating repository: huynguyen251/phobert-legal-qa-v2
2025-06-01 22:08:18,501 - INFO - Repository created/verified: huynguyen251/phobert-legal-qa-v2
2025-06-01 22:08:18,501 - INFO - Uploading model to huynguyen251/phobert-legal-qa-v2...


[A[A





[A[A[A[A[A[A



[A[A[A[A




[A[A[A[A[A


[A[A[A



[A[A[A[A





[A[A[A[A[A[A


[A[A[A

[A[A

[A[A

training_args.bin: 100%|██████████| 5.24k/5.24k [00:00<00:00, 10.2kB/s]
scheduler.pt: 100%|██████████| 1.06k/1.06k [00:00<00:00, 2.03kB/s]


rng_state.pth: 100%|██████████| 14.2k/14.2k [00:00<00:00, 21.6kB/s]


[A[

In [27]:
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
import torch
import torch.nn.functional as F

# Load model và tokenizer
model_name = "huynguyen251/phobert-legal-qa-v2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForQuestionAnswering.from_pretrained(model_name)

# Câu hỏi và đoạn văn bản (context)
question = "Thủ tục cần thiết là gì?"
context = """
Luật này quy định về quyền, nghĩa vụ và trách nhiệm của thanh niên; chính sách của Nhà nước đối với thanh niên; trách nhiệm của cơ quan, tổ chức thanh niên, tổ chức khác, cơ sở giáo dục, gia đình và cá nhân đối với thanh niên; quản lý nhà nước về thanh niên.
Luật này áp dụng cho công dân Việt Nam từ đủ 16 tuổi đến 30 tuổi, bao gồm cả thanh niên trong nước và người Việt Nam định cư ở nước ngoài.
"""

# {"question": "Thủ tục cần thiết là gì?", "context": "Luật này quy định về quyền, nghĩa vụ và trách nhiệm của thanh niên; chính sách của Nhà nước đối với thanh niên; trách nhiệm của cơ quan, tổ chức thanh niên, tổ chức khác, cơ sở giáo dục, gia đình và cá nhân đối với thanh niên; quản lý nhà nước về thanh niê", "answer": "Luật này quy định về quyền, nghĩa vụ và trách nhiệm của thanh niên; chính sách của Nhà nước đối với thanh niên; trách nhiệm của cơ quan, tổ chức thanh niên, tổ chức khác, cơ sở giáo dục, gia đình và cá nhân đối với thanh niên; quản lý nhà nước về thanh niê", "category": "Tổ chức chính trị - xã hội, hội"}


# Mã hóa input
inputs = tokenizer.encode_plus(question, context, return_tensors="pt", truncation=True)

# Dự đoán với model
with torch.no_grad():
    outputs = model(**inputs)
    start_scores = outputs.start_logits
    end_scores = outputs.end_logits

# Tính xác suất (softmax)
start_probs = F.softmax(start_scores, dim=-1)
end_probs = F.softmax(end_scores, dim=-1)

# Lấy chỉ số bắt đầu và kết thúc của câu trả lời
start_index = torch.argmax(start_probs)
end_index = torch.argmax(end_probs) + 1

# Tính độ tin cậy (confidence)
confidence = (start_probs[0][start_index] * end_probs[0][end_index - 1]).item()

# Giải mã câu trả lời
answer = tokenizer.convert_tokens_to_string(
    tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][start_index:end_index])
)

# Hiển thị kết quả
print(f"Question: {question}")
if confidence < 0.5:
    print("⚠️ Confidence is low, answer may not be reliable.")
else:
    print("✅ Confidence is high, answer is likely reliable.")
    print(f"Answer: {answer}")

print(f"Confidence: {confidence:.4f}")

# Đánh giá câu trả lời có đáng tin hay không
if confidence < 0.5 or answer.strip() == "":
    print("⚠️ Không tìm thấy câu trả lời phù hợp trong ngữ cảnh.")


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Question: Thủ tục cần thiết là gì?
✅ Confidence is high, answer is likely reliable.
Answer: </s> Luật này quy định về quyền, nghĩa vụ và trách nhiệm của thanh niên; chính sách của Nhà nước đối với thanh niên; trách nhiệm của cơ quan, tổ chức thanh niên, tổ chức khác, cơ sở giáo dục, gia đình và cá nhân đối với thanh niên; quản lý nhà nước về thanh niên.<unk> Luật này áp dụng cho công dân Việt Nam từ đủ 16 tuổi đến 30 tuổi, bao gồm cả thanh niên trong nước và người Việt Nam định cư ở nước ngoài.@@
Confidence: 0.9893
