In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/dataset-tmp-hunghung/dataset.json


In [None]:
import json
import torch
import numpy as np
from datasets import Dataset, DatasetDict
from transformers import (
    AutoTokenizer, 
    AutoModelForCausalLM, 
    TrainingArguments, 
    Trainer,
    DataCollatorForLanguageModeling,
    EarlyStoppingCallback
)
from peft import LoraConfig, get_peft_model, TaskType, PeftModel
import pandas as pd
from sklearn.model_selection import KFold
from typing import List, Dict, Tuple
import os
from datetime import datetime
import math

class PhysicsChatbotTrainerKFold:
    def __init__(self, model_name="Qwen/Qwen2.5-1.5B-Instruct"):
        """
        Class để fine-tune Qwen3 cho chatbot vật lý với cải thiện overfitting
        """
        self.model_name = model_name
        self.tokenizer = None
        self.base_model = None
        self.fold_results = []
        
        # Set device explicitly
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        
    def setup_tokenizer(self):
        """Khởi tạo tokenizer với physics-specific tokens"""
        print(f"Đang tải tokenizer từ {self.model_name}...")
        
        self.tokenizer = AutoTokenizer.from_pretrained(
            self.model_name,
            trust_remote_code=True,
            padding_side="right"
        )
        
        # Thêm special tokens cho vật lý
        physics_tokens = [
            "<|physics_problem|>", "<|solution|>", "<|step|>", 
            "<|formula|>", "<|unit|>", "<|answer|>"
        ]
        
        self.tokenizer.add_special_tokens({
            "additional_special_tokens": physics_tokens
        })
        
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
            
        print("✅ Đã tải tokenizer và thêm physics tokens!")
        return self.tokenizer
    
    def load_base_model(self):
        """Load base model với cấu hình tối ưu cho physics"""
        print(f"Đang tải base model {self.model_name}...")
        
        model = AutoModelForCausalLM.from_pretrained(
            self.model_name,
            torch_dtype=torch.float16,
            device_map={"": 0},
            trust_remote_code=True,
            # Thêm dropout để giảm overfitting
            attention_dropout=0.1,
            hidden_dropout=0.1,
        )
        
        # Resize embeddings cho physics tokens
        model.resize_token_embeddings(len(self.tokenizer))
        
        model.train()
        model.enable_input_require_grads()
        
        print(f"✅ Đã tải model thành công!")
        return model
    
    def create_physics_prompt_template(self, conversation):
        """Tạo template chuyên biệt cho bài tập vật lý"""
        physics_template = ""
        
        for message in conversation:
            role = message.get('role', '')
            content = message.get('content', '')
            
            if role == 'system':
                physics_template += f"<|im_start|>system\n{content}<|im_end|>\n"
            elif role == 'user':
                # Phân tích nếu là bài tập vật lý
                if any(keyword in content.lower() for keyword in [
                    'tính', 'tìm', 'xác định', 'bài tập', 'câu hỏi',
                    'vận tốc', 'gia tốc', 'lực', 'năng lượng', 'động lượng'
                ]):
                    physics_template += f"<|im_start|>user\n<|physics_problem|>{content}<|im_end|>\n"
                else:
                    physics_template += f"<|im_start|>user\n{content}<|im_end|>\n"
            elif role == 'assistant':
                # Cấu trúc lời giải vật lý
                physics_template += f"<|im_start|>assistant\n<|solution|>{content}<|im_end|>\n"
        
        return physics_template
    
    def apply_physics_chat_template(self, conversations):
        """Áp dụng template chuyên biệt cho vật lý"""
        formatted_data = []
        
        for conv in conversations:
            try:
                # Thử dùng template có sẵn trước
                formatted_text = self.tokenizer.apply_chat_template(
                    conv, 
                    tokenize=False,
                    add_generation_prompt=False
                )
                
                formatted_data.append({
                    'text': formatted_text,
                    'messages': conv
                })
            except Exception as e:
                # Fallback về physics template
                physics_text = self.create_physics_prompt_template(conv)
                formatted_data.append({
                    'text': physics_text,
                    'messages': conv
                })
        
        return formatted_data
    
    def prepare_fold_datasets(self, train_conversations, val_conversations):
        """Chuẩn bị datasets với augmentation cho vật lý"""
        print(f"Đang chuẩn bị datasets - Train: {len(train_conversations)}, Val: {len(val_conversations)}")
        
        # Apply physics-specific template
        train_formatted = self.apply_physics_chat_template(train_conversations)
        val_formatted = self.apply_physics_chat_template(val_conversations)
        
        # Data augmentation cho bài tập vật lý (optional)
        # train_formatted = self.augment_physics_data(train_formatted)
        
        train_dataset = Dataset.from_list(train_formatted)
        val_dataset = Dataset.from_list(val_formatted)
        
        return train_dataset, val_dataset
    
    def setup_lora_config_physics(self, model):
        """Thiết lập LoRA tối ưu cho physics chatbot"""
        model.train()
        
        if hasattr(model, "gradient_checkpointing_enable"):
            model.gradient_checkpointing_enable()
        
        # LoRA config được điều chỉnh để giảm overfitting
        lora_config = LoraConfig(
            task_type=TaskType.CAUSAL_LM,
            inference_mode=False,
            r=8,  # Giảm rank để giảm overfitting
            lora_alpha=16,  # Giảm alpha
            lora_dropout=0.2,  # Tăng dropout
            target_modules=[
                "q_proj", "k_proj", "v_proj", "o_proj",
                "gate_proj", "up_proj", "down_proj"
            ],
            bias="none"
        )
        
        model = get_peft_model(model, lora_config)
        model.train()
        model.print_trainable_parameters()
        
        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        if trainable_params == 0:
            raise ValueError("No trainable parameters found!")
        
        return model
    
    def compute_physics_metrics(self, eval_pred):
        """Tính toán metrics phù hợp cho physics chatbot"""
        predictions, labels = eval_pred
        
        # Basic perplexity
        predictions = predictions.reshape(-1, predictions.shape[-1])
        labels = labels.reshape(-1)
        
        # Mask padding tokens
        mask = labels != -100
        
        if mask.sum() == 0:
            return {"perplexity": float('inf')}
        
        # Calculate perplexity
        predictions = torch.softmax(torch.tensor(predictions), dim=-1)
        labels_masked = labels[mask]
        predictions_masked = predictions[mask]
        
        # Get probabilities for true labels
        true_token_probs = predictions_masked[range(len(labels_masked)), labels_masked]
        
        # Calculate perplexity
        log_prob = torch.log(true_token_probs + 1e-10)
        perplexity = torch.exp(-log_prob.mean()).item()
        
        return {"perplexity": perplexity}
    
    def train_fold(self, fold_idx: int, train_dataset: Dataset, val_dataset: Dataset, 
                   output_dir: str, num_epochs: int = 2) -> Dict:
        """Training cho một fold với early stopping và regularization"""
        
        print(f"\n{'='*50}")
        print(f"🚀 TRAINING FOLD {fold_idx + 1}/5 - PHYSICS CHATBOT")
        print(f"{'='*50}")
        
        # Load model mới cho fold này
        model = self.load_base_model()
        
        # Setup LoRA
        print("Thiết lập LoRA cho physics...")
        model = self.setup_lora_config_physics(model)
        
        # Tokenize datasets
        print("Đang tokenize datasets...")
        
        def safe_tokenize_function(examples):
            try:
                texts = examples['text'] if isinstance(examples['text'], list) else [examples['text']]
                
                tokenized = self.tokenizer(
                    texts,
                    truncation=True,
                    padding="max_length",
                    max_length=1024,  # Giảm max_length để tiết kiệm memory
                    return_tensors=None
                )
                
                labels = []
                for input_ids in tokenized["input_ids"]:
                    if isinstance(input_ids, list):
                        labels.append(input_ids[:])
                    else:
                        labels.append(input_ids.tolist() if hasattr(input_ids, 'tolist') else list(input_ids))
                
                tokenized["labels"] = labels
                return tokenized
                
            except Exception as e:
                print(f"Error in tokenization: {e}")
                raise
        
        tokenized_train = train_dataset.map(
            safe_tokenize_function,
            batched=True,
            batch_size=4,
            remove_columns=train_dataset.column_names
        )
        
        tokenized_val = val_dataset.map(
            safe_tokenize_function,
            batched=True,
            batch_size=4,
            remove_columns=val_dataset.column_names
        )
        
        # Data collator
        data_collator = DataCollatorForLanguageModeling(
            tokenizer=self.tokenizer,
            mlm=False,
            pad_to_multiple_of=8
        )
        
        fold_output_dir = os.path.join(output_dir, f"fold_{fold_idx}")
        
        # Training arguments với cải thiện overfitting
        training_args = TrainingArguments(
            output_dir=fold_output_dir,
            
            # Epochs và scheduling - giảm epochs
            num_train_epochs=num_epochs,
            learning_rate=2e-5,  # Giảm learning rate
            lr_scheduler_type="cosine_with_restarts",
            warmup_ratio=0.1,
            
            # Batch sizes
            per_device_train_batch_size=1,  # Giảm batch size
            per_device_eval_batch_size=2,
            gradient_accumulation_steps=16,  # Tăng để compensate
            
            # Regularization để giảm overfitting
            weight_decay=0.01,  # Thêm weight decay
            gradient_checkpointing=True,
            fp16=True,
            dataloader_drop_last=True,
            
            # Early stopping
            eval_strategy="steps",
            eval_steps=25,  # Eval thường xuyên hơn
            save_strategy="steps", 
            save_steps=50,
            logging_steps=5,
            
            # Best model và early stopping
            load_best_model_at_end=True,
            metric_for_best_model="eval_loss",
            greater_is_better=False,
            save_total_limit=1,  # Chỉ giữ best model
            
            # Ngăn overfitting
            max_grad_norm=0.5,  # Gradient clipping
            
            # Single GPU
            ddp_find_unused_parameters=False,
            local_rank=-1,
            
            # Reports
            report_to=[],  # Tắt wandb/tensorboard để đơn giản
            run_name=f"physics-chatbot-fold-{fold_idx}",
            
            # Reproducibility
            seed=42 + fold_idx,
            data_seed=42 + fold_idx,
            
            # Cleanup
            remove_unused_columns=False,
            dataloader_num_workers=0,
            dataloader_pin_memory=False,
        )
        
        # Early stopping callback
        early_stopping = EarlyStoppingCallback(
            early_stopping_patience=3,
            early_stopping_threshold=0.01
        )
        
        # Trainer
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=tokenized_train,
            eval_dataset=tokenized_val,
            data_collator=data_collator,
            tokenizer=self.tokenizer,
            compute_metrics=self.compute_physics_metrics,
            callbacks=[early_stopping]
        )
        
        # Training
        print(f"🔄 Bắt đầu training physics chatbot fold {fold_idx + 1}...")
        try:
            train_result = trainer.train()
            
            # Evaluate
            eval_result = trainer.evaluate()
            
            # Save model
            trainer.save_model()
            self.tokenizer.save_pretrained(fold_output_dir)
            
            # Lưu metrics
            fold_metrics = {
                'fold': fold_idx,
                'train_loss': train_result.training_loss,
                'eval_loss': eval_result['eval_loss'],
                'perplexity': eval_result.get('eval_perplexity', None),
                'train_samples': len(train_dataset),
                'val_samples': len(val_dataset),
                'output_dir': fold_output_dir,
                'epochs_trained': train_result.epoch
            }
            
            print(f"✅ Hoàn thành fold {fold_idx + 1}!")
            print(f"   - Train Loss: {fold_metrics['train_loss']:.4f}")
            print(f"   - Eval Loss: {fold_metrics['eval_loss']:.4f}")
            if fold_metrics['perplexity']:
                print(f"   - Perplexity: {fold_metrics['perplexity']:.2f}")
            print(f"   - Epochs: {fold_metrics['epochs_trained']:.1f}")
            
        except Exception as e:
            print(f"❌ Lỗi trong quá trình training fold {fold_idx + 1}: {e}")
            raise
        finally:
            # Clean up GPU memory
            del model
            del trainer
            torch.cuda.empty_cache()
        
        return fold_metrics
    
    def run_kfold_training(self, conversations: List[Dict], k: int = 5, 
                          output_dir: str = "./physics-chatbot-kfold", 
                          num_epochs: int = 2):
        """
        Chạy K-Fold Cross Validation training cho physics chatbot
        """
        self.setup_tokenizer()
        
        conversations_array = np.array(conversations, dtype=object)
        kfold = KFold(n_splits=k, shuffle=True, random_state=42)
        os.makedirs(output_dir, exist_ok=True)
        
        # Training each fold
        for fold_idx, (train_indices, val_indices) in enumerate(kfold.split(conversations_array)):
            train_conversations = conversations_array[train_indices].tolist()
            val_conversations = conversations_array[val_indices].tolist()
            
            train_dataset, val_dataset = self.prepare_fold_datasets(
                train_conversations, val_conversations
            )
            
            fold_metrics = self.train_fold(
                fold_idx=fold_idx,
                train_dataset=train_dataset,
                val_dataset=val_dataset,
                output_dir=output_dir,
                num_epochs=num_epochs
            )
            
            self.fold_results.append(fold_metrics)
        
        self.print_summary_results()
        self.save_kfold_results(output_dir)
        
        return self.fold_results
    
    def print_summary_results(self):
        """In kết quả tổng hợp với physics metrics"""
        print(f"\n{'='*60}")
        print("📊 KẾT QUẢ K-FOLD PHYSICS CHATBOT TRAINING")
        print(f"{'='*60}")
        
        avg_train_loss = np.mean([r['train_loss'] for r in self.fold_results])
        avg_eval_loss = np.mean([r['eval_loss'] for r in self.fold_results])
        std_train_loss = np.std([r['train_loss'] for r in self.fold_results])
        std_eval_loss = np.std([r['eval_loss'] for r in self.fold_results])
        
        # Tính overfitting gap
        overfitting_gap = avg_eval_loss - avg_train_loss
        
        print("\\nKết quả từng fold:")
        for result in self.fold_results:
            gap = result['eval_loss'] - result['train_loss']
            print(f"  Fold {result['fold'] + 1}: Train={result['train_loss']:.4f}, "
                  f"Eval={result['eval_loss']:.4f}, Gap={gap:.4f}, "
                  f"Epochs={result.get('epochs_trained', 'N/A')}")
        
        print(f"\\nTổng kết:")
        print(f"  - Average Train Loss: {avg_train_loss:.4f} (±{std_train_loss:.4f})")
        print(f"  - Average Eval Loss: {avg_eval_loss:.4f} (±{std_eval_loss:.4f})")
        print(f"  - Overfitting Gap: {overfitting_gap:.4f}")
        
        if overfitting_gap > 0.2:
            print(f"  ⚠️  Vẫn có overfitting - cần điều chỉnh thêm!")
        elif overfitting_gap > 0.1:
            print(f"  📊 Overfitting đã giảm đáng kể!")
        else:
            print(f"  ✅ Overfitting được kiểm soát tốt!")
        
        print(f"{'='*60}")
    
    def save_kfold_results(self, output_dir: str):
        """Lưu kết quả với physics-specific metrics"""
        results_file = os.path.join(output_dir, "physics_kfold_results.json")
        
        avg_train_loss = np.mean([r['train_loss'] for r in self.fold_results])
        avg_eval_loss = np.mean([r['eval_loss'] for r in self.fold_results])
        
        results_data = {
            'timestamp': datetime.now().isoformat(),
            'model_name': self.model_name,
            'task': 'physics_chatbot',
            'k_folds': len(self.fold_results),
            'fold_results': self.fold_results,
            'summary': {
                'avg_train_loss': avg_train_loss,
                'avg_eval_loss': avg_eval_loss,
                'std_train_loss': np.std([r['train_loss'] for r in self.fold_results]),
                'std_eval_loss': np.std([r['eval_loss'] for r in self.fold_results]),
                'overfitting_gap': avg_eval_loss - avg_train_loss,
                'avg_epochs_trained': np.mean([r.get('epochs_trained', 0) for r in self.fold_results])
            }
        }
        
        with open(results_file, 'w', encoding='utf-8') as f:
            json.dump(results_data, f, indent=2, ensure_ascii=False)
        
        print(f"\\n💾 Đã lưu kết quả Physics Chatbot tại: {results_file}")
    
    def test_physics_problems(self, test_conversations: List[Dict], output_dir: str):
        """Test model với các bài tập vật lý"""
        best_fold = min(self.fold_results, key=lambda x: x['eval_loss'])
        best_fold_idx = best_fold['fold']
        best_fold_dir = best_fold['output_dir']
        
        print(f"\\n🧪 Testing Physics Chatbot từ fold tốt nhất (Fold {best_fold_idx + 1})")
        print(f"   Eval Loss: {best_fold['eval_loss']:.4f}")
        
        # Load best model
        base_model = AutoModelForCausalLM.from_pretrained(
            self.model_name,
            torch_dtype=torch.float16,
            device_map={"": 0}
        )
        
        model = PeftModel.from_pretrained(base_model, best_fold_dir)
        tokenizer = AutoTokenizer.from_pretrained(best_fold_dir)
        
        print("\\n=== TESTING PHYSICS CHATBOT ===")
        
        for i, conv in enumerate(test_conversations[:3]):
            # Tạo prompt physics
            test_messages = conv[:-1]
            
            prompt = tokenizer.apply_chat_template(
                test_messages,
                tokenize=False,
                add_generation_prompt=True
            )
            
            inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
            
            with torch.no_grad():
                outputs = model.generate(
                    **inputs,
                    max_new_tokens=256,
                    temperature=0.7,
                    do_sample=True,
                    top_p=0.9,
                    pad_token_id=tokenizer.eos_token_id,
                    repetition_penalty=1.1
                )
            
            generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
            response = generated_text[len(prompt):].strip()
            
            print(f"\\n--- BÀI TẬP VẬT LÝ {i+1} ---")
            print(f"Câu hỏi: {conv[-2]['content']}")
            print(f"Đáp án mẫu: {conv[-1]['content'][:100]}...")
            print(f"Model trả lời: {response}")
            print("-" * 50)

# Hàm chính để sử dụng
def main_physics_chatbot_training():
    """Hàm chính để chạy K-Fold training cho Physics Chatbot"""
    
    # 1. Load dataset
    with open("/home/phamvanhung/SSD_512GB/working_folder/Project_Github/Building_a_physics_problem_solving_system/processing_dataset/dataset_physic/data_practice/dataset/dataset.json", "r", encoding="utf-8") as file:
        physics_conversations = json.load(file)
        print(f"Loaded {len(physics_conversations)} physics conversations")
    
    # 2. Khởi tạo trainer
    trainer = PhysicsChatbotTrainerKFold("Qwen/Qwen2.5-1.5B-Instruct")
    
    # 3. Run K-Fold training với early stopping
    fold_results = trainer.run_kfold_training(
        conversations=physics_conversations,
        k=5,
        output_dir="/home/phamvanhung/SSD_512GB/working_folder/Project_Github/Building_a_physics_problem_solving_system/weight",
        num_epochs=2  # Giảm epochs để tránh overfitting
    )
    
    # 4. Test với physics problems
    test_conversations = physics_conversations[:10]
    trainer.test_physics_problems(test_conversations, "/kaggle/working/physics-chatbot-kfold-improved")
    
    print("\\n🎉 Hoàn thành Physics Chatbot Training với overfitting control!")

if __name__ == "__main__":
    # Kiểm tra GPU
    print(f"CUDA available: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        print(f"GPU: {torch.cuda.get_device_name()}")
        print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
    
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    main_physics_chatbot_training()

2025-07-16 15:27:46.422398: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752679666.615386      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752679666.673773      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


CUDA available: True
GPU: Tesla T4
Memory: 15.8 GB
Loaded 1952 conversations
Đang tải tokenizer từ Qwen/Qwen2.5-1.5B-Instruct...


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

✅ Đã tải tokenizer thành công!
Đang chuẩn bị datasets - Train: 1561, Val: 391

🚀 TRAINING FOLD 1/5
Đang tải base model Qwen/Qwen2.5-1.5B-Instruct...


config.json:   0%|          | 0.00/660 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

✅ Đã tải model thành công trên cuda:0!
Thiết lập LoRA...
trainable params: 18,464,768 || all params: 1,562,179,072 || trainable%: 1.1820
Đang tokenize datasets...


Map:   0%|          | 0/1561 [00:00<?, ? examples/s]

Map:   0%|          | 0/391 [00:00<?, ? examples/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


🔄 Bắt đầu training fold 1...


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss,Validation Loss
50,0.3545,0.702721
100,0.3292,0.643389




✅ Hoàn thành fold 1!
   - Train Loss: 0.3850
   - Eval Loss: 0.6434
Đang chuẩn bị datasets - Train: 1561, Val: 391

🚀 TRAINING FOLD 2/5
Đang tải base model Qwen/Qwen2.5-1.5B-Instruct...
✅ Đã tải model thành công trên cuda:0!
Thiết lập LoRA...
trainable params: 18,464,768 || all params: 1,562,179,072 || trainable%: 1.1820
Đang tokenize datasets...


Map:   0%|          | 0/1561 [00:00<?, ? examples/s]

Map:   0%|          | 0/391 [00:00<?, ? examples/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


🔄 Bắt đầu training fold 2...


Step,Training Loss,Validation Loss
50,0.3731,0.6843
100,0.3268,0.626415




✅ Hoàn thành fold 2!
   - Train Loss: 0.3880
   - Eval Loss: 0.6264
Đang chuẩn bị datasets - Train: 1562, Val: 390

🚀 TRAINING FOLD 3/5
Đang tải base model Qwen/Qwen2.5-1.5B-Instruct...
✅ Đã tải model thành công trên cuda:0!
Thiết lập LoRA...
trainable params: 18,464,768 || all params: 1,562,179,072 || trainable%: 1.1820
Đang tokenize datasets...


Map:   0%|          | 0/1562 [00:00<?, ? examples/s]

Map:   0%|          | 0/390 [00:00<?, ? examples/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


🔄 Bắt đầu training fold 3...


Step,Training Loss,Validation Loss
50,0.3547,0.706174
100,0.3129,0.647625




✅ Hoàn thành fold 3!
   - Train Loss: 0.3874
   - Eval Loss: 0.6476
Đang chuẩn bị datasets - Train: 1562, Val: 390

🚀 TRAINING FOLD 4/5
Đang tải base model Qwen/Qwen2.5-1.5B-Instruct...
✅ Đã tải model thành công trên cuda:0!
Thiết lập LoRA...
trainable params: 18,464,768 || all params: 1,562,179,072 || trainable%: 1.1820
Đang tokenize datasets...


Map:   0%|          | 0/1562 [00:00<?, ? examples/s]

Map:   0%|          | 0/390 [00:00<?, ? examples/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


🔄 Bắt đầu training fold 4...


Step,Training Loss,Validation Loss
50,0.3577,0.694183
100,0.3147,0.63684




✅ Hoàn thành fold 4!
   - Train Loss: 0.3870
   - Eval Loss: 0.6368
Đang chuẩn bị datasets - Train: 1562, Val: 390

🚀 TRAINING FOLD 5/5
Đang tải base model Qwen/Qwen2.5-1.5B-Instruct...
✅ Đã tải model thành công trên cuda:0!
Thiết lập LoRA...
trainable params: 18,464,768 || all params: 1,562,179,072 || trainable%: 1.1820
Đang tokenize datasets...


Map:   0%|          | 0/1562 [00:00<?, ? examples/s]

Map:   0%|          | 0/390 [00:00<?, ? examples/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


🔄 Bắt đầu training fold 5...


Step,Training Loss,Validation Loss
50,0.3597,0.697586
100,0.323,0.63897




✅ Hoàn thành fold 5!
   - Train Loss: 0.3865
   - Eval Loss: 0.6390

📊 KẾT QUẢ K-FOLD CROSS VALIDATION

Kết quả từng fold:
  Fold 1: Train Loss = 0.3850, Eval Loss = 0.6434
  Fold 2: Train Loss = 0.3880, Eval Loss = 0.6264
  Fold 3: Train Loss = 0.3874, Eval Loss = 0.6476
  Fold 4: Train Loss = 0.3870, Eval Loss = 0.6368
  Fold 5: Train Loss = 0.3865, Eval Loss = 0.6390

Tổng kết:
  - Average Train Loss: 0.3868 (±0.0010)
  - Average Eval Loss: 0.6386 (±0.0072)

💾 Đã lưu kết quả K-Fold tại: /kaggle/working/qwen3-physics-kfold/kfold_results.json

🏆 Testing model từ fold tốt nhất (Fold 2)
   Eval Loss: 0.6264

=== TESTING BEST MODEL ===

--- TEST CASE 1 ---
User: Câu hỏi: Bạn A đi xe đạp từ nhà qua trạm xăng, tới siêu thị mua đồ rồi quay về nhà cất đồ, sau đó đi xe đến trường.Chọn hệ tọa độ có gốc là vị trí nhà bạn A, trục Ox trùng với đường đi từ nhà bạn A tới trường.a) Tính quãng đường đi được và độ dịch chuyển của bạn A khi đi từ trạm xăng tới siêu thị.b) Tính quãng đường đi được và độ