In [1]:
!pip install -U transformers trl peft datasets evaluate rouge_score underthesea bitsandbytes thefuzz sentence-transformers numpy dotenv
# Tải tài nguyên NLTK cho METEOR
import nltk
nltk.download('wordnet')
nltk.download('omw-1.4')



[nltk_data] Downloading package wordnet to
[nltk_data]     /home/thanhnguyenvq2403/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /home/thanhnguyenvq2403/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')

In [3]:
import os
os.environ["WANDB_DISABLED"] = "true"
import json
import pandas as pd
from datasets import Dataset
from sklearn.model_selection import train_test_split
import string
from underthesea import word_tokenize
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, EarlyStoppingCallback
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer
import evaluate

  from .autonotebook import tqdm as notebook_tqdm
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from dotenv import load_dotenv
import os
import torch
from peft import LoraConfig, get_peft_model

# Tải biến môi trường từ file .env
load_dotenv()

# Đọc access token từ biến môi trường
hf_token = os.getenv("HF_DAMA2")
if not hf_token:
    raise ValueError("Không tìm thấy HF_DAMA2 trong file .env. Vui lòng thêm token vào file .env với định dạng: HF_DAMA2=your_token")

def load_model_and_tokenizer(quantization="int8"):  # Thêm tham số quantization
    model_name = "vietgpt/dama-2-7b-chat"

    # Cấu hình quantization với bitsandbytes
    if quantization == "int8":
        quantization_config = BitsAndBytesConfig(
            load_in_8bit=True,  # Quantize thành INT8
            bnb_8bit_compute_dtype=torch.bfloat16,  # Dùng bfloat16 để tính toán
            bnb_8bit_use_double_quant=True,  # Double quantization để tăng hiệu quả
        )
    elif quantization == "int4":
        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,  # Quantize thành INT4
            bnb_4bit_compute_dtype=torch.bfloat16,  # Dùng bfloat16 để tính toán
            bnb_4bit_use_double_quant=True,  # Double quantization
            bnb_4bit_quant_type="nf4",  # Dùng NF4 (Normalized Float 4-bit) để tối ưu
        )
    else:
        quantization_config = None  # Không quantize

    # Tải tokenizer và mô hình với token xác thực
    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        token=hf_token  # Sử dụng token từ biến môi trường
    )
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=quantization_config,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        token=hf_token  # Sử dụng token từ biến môi trường
    )
    print(model)
    # Cấu hình LoRA
    peft_config = LoraConfig(
        r=32,
        lora_alpha=32,
        lora_dropout=0.5,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
    )
    model = get_peft_model(model, peft_config)
    return model, tokenizer, peft_config

# Tải mô hình với INT8 quantization
model, tokenizer, peft_config = load_model_and_tokenizer(quantization="int8")

Loading checkpoint shards: 100%|██████████| 2/2 [00:10<00:00,  5.28s/it]


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(50261, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear8bitLt(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear8bitLt(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear8bitLt(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
 

In [5]:
# Định nghĩa formatting_func với apply_chat_template
def formatting_func(example):
    if not all(k in example for k in ['input', 'output']):
        print('Thiếu key trong example:', example)
        return ''
    messages = [
        {"role": "user", "content": example['input']},
        {"role": "assistant", "content": example['output']}
    ]
    return tokenizer.apply_chat_template(messages, tokenize=False)

In [6]:
import os
import json
import pandas as pd
from datasets import Dataset
from sklearn.model_selection import train_test_split
import string
from underthesea import word_tokenize
from thefuzz import fuzz

def preprocess_text(text):
    text = text.lower()
    text = text.translate(str.maketrans("", "", string.punctuation))
    tokens = word_tokenize(text)
    text = " ".join(tokens)
    return text

def extract_json_from_folder(folder_path):
    dataset = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".json"):
            file_path = os.path.join(folder_path, filename)
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    json_data = json.load(f)
                    for item in json_data:
                        if all(k in item for k in ['input', 'output']):
                            item['input'] = preprocess_text(item['input'])
                            item['output'] = preprocess_text(item['output'])
                            dataset.append(item)
                        else:
                            print(f"Thiếu trường trong {filename}: {item}")
            except json.JSONDecodeError:
                print(f"Không thể parse JSON từ {filename}")
            except Exception as e:
                print(f"Lỗi khi đọc {filename}: {e}")
    return dataset

folder_path = "/home/thanhnguyenvq2403/model/KLTN/data_finetune"
dataset = extract_json_from_folder(folder_path)

df = pd.DataFrame(dataset)
print("Số lượng giá trị duy nhất trong 'input' (exact):", df['input'].nunique())
print("Số lượng giá trị duy nhất trong 'output' (exact):", df['output'].nunique())
print("Tổng số hàng:", len(df))

# Fuzzy matching để tìm các record tương tự
similarity_threshold = 90  # Ngưỡng độ tương đồng (90%)
input_pairs = []
output_pairs = []

# Tìm các cặp input và output tương tự
for i in range(len(df)):
    for j in range(i + 1, len(df)):
        input_sim = fuzz.ratio(df['input'].iloc[i], df['input'].iloc[j])
        if input_sim >= similarity_threshold:
            input_pairs.append((i, j, input_sim))
        output_sim = fuzz.ratio(df['output'].iloc[i], df['output'].iloc[j])
        if output_sim >= similarity_threshold:
            output_pairs.append((i, j, output_sim))

print(f"Số cặp input tương tự (>{similarity_threshold}%):", len(input_pairs))
print(f"Số cặp output tương tự (>{similarity_threshold}%):", len(output_pairs))

# Loại bỏ các record có input hoặc output tương tự (giữ record đầu tiên)
indices_to_keep = set(range(len(df)))
for i, j, _ in input_pairs:
    if j in indices_to_keep:
        indices_to_keep.remove(j)
for i, j, _ in output_pairs:
    if j in indices_to_keep:
        indices_to_keep.remove(j)

df = df.iloc[list(indices_to_keep)].reset_index(drop=True)
print("Số hàng sau khi xóa record tương tự:", len(df))

# Kiểm tra lại độ unique
print("Số lượng giá trị duy nhất trong 'input' (sau xử lý):", df['input'].nunique())
print("Số lượng giá trị duy nhất trong 'output' (sau xử lý):", df['output'].nunique())

# Chia train/validation
full_dataset = Dataset.from_pandas(df[['input', 'output']])
train_df, eval_df = train_test_split(df, test_size=0.2, random_state=42)
train_dataset = Dataset.from_pandas(train_df[['input', 'output']])
eval_dataset = Dataset.from_pandas(eval_df[['input', 'output']])
print(train_df[['input', 'output']].head())

Số lượng giá trị duy nhất trong 'input' (exact): 1599
Số lượng giá trị duy nhất trong 'output' (exact): 1659
Tổng số hàng: 1700
Số cặp input tương tự (>90%): 320
Số cặp output tương tự (>90%): 41
Số hàng sau khi xóa record tương tự: 1471
Số lượng giá trị duy nhất trong 'input' (sau xử lý): 1471
Số lượng giá trị duy nhất trong 'output' (sau xử lý): 1471
                                                  input  \
998       trẻ tự kỷ có cần học trường chuyên biệt không   
254                 trẻ nói chuyện không đúng hoàn cảnh   
1073                      tự kỷ có bị coi là bệnh không   
643   trẻ không biết hoàn thành công việc đúng thời hạn   
1450                     trẻ không chỉ vật để gây chú ý   

                                                 output  
998   không phải trẻ tự kỷ nào cũng cần học trường c...  
254   trẻ nói nội dung không phù hợp tình huống có t...  
1073  tự kỷ không phải là bệnh mà là một rối loạn ph...  
643   hay quên hoặc trễ deadline là thiếu tổ chức – ...  


In [7]:
import evaluate
import numpy as np
from sentence_transformers import SentenceTransformer, util
def evaluate_metrics(predictions, references):
    # Tải các độ đo
    rouge = evaluate.load("rouge")
    meteor = evaluate.load("meteor")

    # Tính các độ đo
    rouge_results = rouge.compute(predictions=predictions, references=references)
    meteor_results = meteor.compute(predictions=predictions, references=references)

    # Tải mô hình nhúng câu để tính Cosine Similarity
    embedder = SentenceTransformer('BAAI/bge-m3')

    # Tính embeddings cho dự đoán và tham chiếu
    pred_embeddings = embedder.encode(predictions, convert_to_tensor=True)
    ref_embeddings = embedder.encode(references, convert_to_tensor=True)

    # Tính Cosine Similarity giữa từng cặp dự đoán-tham chiếu
    cosine_scores = util.cos_sim(pred_embeddings, ref_embeddings)
    
    # Xử lý trường hợp predictions rỗng để tránh lỗi
    if len(predictions) > 0 and cosine_scores.ndim == 2 and cosine_scores.shape[0] == len(predictions):
        avg_cosine_similarity = np.mean([cosine_scores[i][i].item() for i in range(len(predictions))])
    elif len(predictions) == 0:
        avg_cosine_similarity = 0.0 # Hoặc np.nan
        print("Cảnh báo: Danh sách predictions rỗng, cosine_similarity được đặt là 0.0")
    else:
        # Trường hợp này ít xảy ra nếu predictions và references cùng độ dài và không rỗng
        print(f"Cảnh báo: Kích thước cosine_scores ({cosine_scores.shape}) không khớp với len(predictions) ({len(predictions)}).")
        avg_cosine_similarity = np.nan # Hoặc một giá trị mặc định
    # Gộp kết quả
    metrics = {
        "rouge1": rouge_results["rouge1"],
        "rouge2": rouge_results["rouge2"],
        "rougeL": rouge_results["rougeL"],
        "meteor": meteor_results["meteor"],
        "cosine_similarity": avg_cosine_similarity,
    }
    return metrics

def generate_predictions(model, tokenizer, inputs, max_length=200):
    """Tạo dự đoán từ mô hình cho các đầu vào."""
    model.eval()
    predictions = []
    # Đảm bảo pad_token được thiết lập
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    for input_text in inputs:
        # Tiền xử lý input để đồng bộ với huấn luyện
        input_text = preprocess_text(input_text)
        prompt = f"<s>[INST] {input_text} [/INST]"
        inputs_encoded = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)
        with torch.no_grad():
            outputs = model.generate(
                **inputs_encoded,
                max_length=max_length,
                num_return_sequences=1,
                pad_token_id=tokenizer.pad_token_id
            )
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        pred = generated_text.split("[/INST]")[-1].strip()
        # Tiền xử lý dự đoán để đồng bộ với tham chiếu
        pred = preprocess_text(pred)
        predictions.append(pred)
    return predictions

In [8]:
import json
import time
import gc
from transformers import TrainingArguments
from trl import SFTTrainer
from transformers import EarlyStoppingCallback

# Loại bỏ cột không cần thiết để tránh cảnh báo
train_dataset = train_dataset.remove_columns(['__index_level_0__'] if '__index_level_0__' in train_dataset.column_names else [])
eval_dataset = eval_dataset.remove_columns(['__index_level_0__'] if '__index_level_0__' in eval_dataset.column_names else [])

model_name = "vietgpt/dama-2-7b-chat"

# Đo thời gian chạy đơn
print("\n=== Bắt đầu chạy đơn ===")
start_time = time.time()

# Cấu hình huấn luyện cho Single Run
training_arguments_single = TrainingArguments(
    output_dir=f"./results_single_{model_name.split('/')[-1]}",
    num_train_epochs=3,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    optim="paged_adamw_32bit",
    save_steps=100,
    logging_steps=10,
    learning_rate=5e-5,
    weight_decay=0.1,
    fp16=False,
    bf16=True,
    max_grad_norm=0.3,
    warmup_ratio=0.1,
    group_by_length=True,
    lr_scheduler_type="cosine",
    eval_strategy="steps",
    eval_steps=10,
    logging_strategy="steps",
    log_level="info",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
)

# Huấn luyện Single Run
trainer_single = SFTTrainer(
    model=model,
    args=training_arguments_single,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    peft_config=peft_config,
    formatting_func=formatting_func,
    callbacks=[
        EarlyStoppingCallback(
            early_stopping_patience=3,
            early_stopping_threshold=0.01,
        )
    ],
)
print(f"Train dataset size: {len(train_dataset)}, Eval dataset size: {len(eval_dataset)}")
trainer_single.train()

# Lưu mô hình Single Run
model.save_pretrained(f"./finetuned_{model_name.split('/')[-1]}_single")
tokenizer.save_pretrained(f"./finetuned_{model_name.split('/')[-1]}_single")

# Đánh giá Single Run
test_inputs = eval_df['input'].tolist()
test_references = eval_df['output'].tolist()
predictions_single = generate_predictions(model, tokenizer, test_inputs)
metrics_single = evaluate_metrics(predictions_single, test_references)
print(f"Single Run Metrics for {model_name}:", metrics_single)

# Lưu metrics vào file
metrics_file = f"single_run_metrics_{model_name.split('/')[-1]}.json"
try:
    with open(metrics_file, "w", encoding="utf-8") as f:
        json.dump(metrics_single, f, indent=4, ensure_ascii=False)
    print(f"Đã lưu metrics chạy đơn vào: {metrics_file}")
except Exception as e:
    print(f"Lỗi khi lưu metrics chạy đơn: {e}")

# In thời gian chạy
end_time = time.time()
duration = end_time - start_time
print(f"Thời gian chạy đơn: {duration:.2f} giây")

# Kiểm tra mẫu dự đoán
for i in range(5):
    print(f"Input: {test_inputs[i]}")
    print(f"Prediction: {predictions_single[i]}")
    print(f"Reference: {test_references[i]}\n")
del model, trainer_single
torch.cuda.empty_cache()
gc.collect()

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



=== Bắt đầu chạy đơn ===


Applying formatting function to train dataset: 100%|██████████| 1176/1176 [00:00<00:00, 18966.20 examples/s]
Converting train dataset to ChatML: 100%|██████████| 1176/1176 [00:00<00:00, 60245.03 examples/s]
Adding EOS to train dataset: 100%|██████████| 1176/1176 [00:00<00:00, 46164.16 examples/s]
Tokenizing train dataset: 100%|██████████| 1176/1176 [00:00<00:00, 8017.75 examples/s]
Truncating train dataset: 100%|██████████| 1176/1176 [00:00<00:00, 385864.16 examples/s]
Applying formatting function to eval dataset: 100%|██████████| 295/295 [00:00<00:00, 19021.06 examples/s]
Converting eval dataset to ChatML: 100%|██████████| 295/295 [00:00<00:00, 46566.55 examples/s]
Adding EOS to eval dataset: 100%|██████████| 295/295 [00:00<00:00, 30539.04 examples/s]
Tokenizing eval dataset: 100%|██████████| 295/295 [00:00<00:00, 8020.01 examples/s]
Truncating eval dataset: 100%|██████████| 295/295 [00:00<00:00, 126528.24 examples/s]
Using auto half precision backend
No label_names provided for model

Train dataset size: 1176, Eval dataset size: 295




Step,Training Loss,Validation Loss
10,4.3059,4.520417
20,4.0195,3.47767
30,2.9685,2.642222
40,2.4011,2.075317
50,1.9984,1.871779
60,1.8272,1.799386
70,1.775,1.740034
80,1.6601,1.697594
90,1.6531,1.659984
100,1.5989,1.643116


The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: input, output, text. If input, output, text are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 295
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: input, output, text. If input, output, text are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 295
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: input, output, text. If input, output, text are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2

Single Run Metrics for vietgpt/dama-2-7b-chat: {'rouge1': np.float64(0.607874256002578), 'rouge2': np.float64(0.28743063573810734), 'rougeL': np.float64(0.39841565175339244), 'meteor': np.float64(0.32690395564713454), 'cosine_similarity': np.float64(0.7793062499013997)}
Đã lưu metrics chạy đơn vào: single_run_metrics_dama-2-7b-chat.json
Thời gian chạy đơn: 2021.01 giây
Input: trẻ không biết liên hệ với người lớn khi cần hỗ trợ hoạt động
Prediction: trẻ không biết liên hệ với người lớn khi cần hỗ trợ hoạt động không biết nhờ người lớn giúp khi gặp khó khăn là thiếu kỹ năng an toàn – nên luyện mẫu câu con cần giúp con
Reference: tự làm hết mà không hiệu quả – nên luyện viết thư gọi điện hoặc nhờ giáo viên liên hệ hỗ trợ đúng người

Input: trẻ không có giao tiếp mắt
Prediction: trẻ không có giao tiếp mắt trẻ không nhìn vào mắt người khác khi nói chuyện là dấu hiệu thiếu tương tác xã hội
Reference: giao tiếp bằng mắt là một dấu hiệu quan trọng của sự phát triển xã hội nếu trẻ ít hoặc không

48873

In [9]:
from sklearn.model_selection import KFold
import numpy as np
import gc
import time
import torch

# Cấu hình KFold
kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold_metrics = []
fold_models = []
fold_times = []
total_start_time = time.time()

# Lặp qua từng fold
for fold, (train_idx, eval_idx) in enumerate(kf.split(df)):
    print(f"\nTraining Fold {fold + 1}...")
    fold_start_time = time.time()

    # Tạo tập train và eval cho fold hiện tại
    train_fold = df.iloc[train_idx][['input', 'output']]
    eval_fold = df.iloc[eval_idx][['input', 'output']]
    train_fold_dataset = Dataset.from_pandas(train_fold)
    eval_fold_dataset = Dataset.from_pandas(eval_fold)

    # Loại bỏ cột không cần thiết
    train_fold_dataset = train_fold_dataset.remove_columns(['__index_level_0__'] if '__index_level_0__' in train_fold_dataset.column_names else [])
    eval_fold_dataset = eval_fold_dataset.remove_columns(['__index_level_0__'] if '__index_level_0__' in eval_fold_dataset.column_names else [])

    # Tải lại mô hình gốc với INT4 quantization
    model, tokenizer, peft_config = load_model_and_tokenizer(quantization="int4")
    print(f"Fold {fold + 1} - Train size: {len(train_fold_dataset)}, Eval size: {len(eval_fold_dataset)}")

    # Cấu hình huấn luyện cho fold
    training_arguments_fold = TrainingArguments(
        output_dir=f"./results_{model_name.split('/')[-1]}fold_{fold + 1}",
        num_train_epochs=3,
        per_device_train_batch_size=2,
        gradient_accumulation_steps=8,
        optim="paged_adamw_32bit",
        save_steps=100,
        logging_steps=10,
        learning_rate=5e-5,
        weight_decay=0.1,
        fp16=False,
        bf16=True,
        max_grad_norm=0.3,
        warmup_ratio=0.1,
        group_by_length=True,
        lr_scheduler_type="cosine",
        eval_strategy="steps",
        eval_steps=10,
        logging_strategy="steps",
        log_level="info",
        load_best_model_at_end=True,
        metric_for_best_model="eval_loss",
        greater_is_better=False,
    )

    # Huấn luyện fold
    trainer_fold = SFTTrainer(
        model=model,
        args=training_arguments_fold,
        train_dataset=train_fold_dataset,
        eval_dataset=eval_fold_dataset,
        peft_config=peft_config,
        formatting_func=formatting_func,
        callbacks=[
            EarlyStoppingCallback(
                early_stopping_patience=3,
                early_stopping_threshold=0.01,
            )
        ],
    )
    trainer_fold.train()

    # Lưu mô hình fold
    fold_path = f"./finetuned_dama_2_fold_{fold + 1}"
#pragma warning disable format
    model.save_pretrained(fold_path)
    tokenizer.save_pretrained(fold_path)
    fold_models.append(fold_path)

    # Đánh giá fold
    test_inputs_fold = eval_fold['input'].tolist()
    test_references_fold = eval_fold['output'].tolist()
    predictions_fold = generate_predictions(model, tokenizer, test_inputs_fold)
    metrics_fold = evaluate_metrics(predictions_fold, test_references_fold)
    print(f"Fold {fold + 1} Metrics:", metrics_fold)
    fold_metrics.append(metrics_fold)

    # Lưu metrics của fold
    with open(f"fold_{fold + 1}_metrics_dama_2.json", "w") as f:
        json.dump(metrics_fold, f, indent=4)

    # Đo thời gian chạy của fold
    fold_end_time = time.time()
    fold_duration = fold_end_time - fold_start_time
    fold_times.append(fold_duration)
    print(f"Thời gian chạy Fold {fold + 1}: {fold_duration:.2f} giây")

    # Dọn dẹp bộ nhớ
    del model, trainer_fold
    torch.cuda.empty_cache()
    gc.collect()

# Tính tổng thời gian và thời gian trung bình
total_end_time = time.time()
total_duration = total_end_time - total_start_time
print(f"\n=== Kết thúc huấn luyện K-Fold ===")
print(f"Tổng thời gian chạy: {total_duration:.2f} giây")
print(f"Thời gian trung bình mỗi fold: {np.mean(fold_times):.2f} giây")

# Tính trung bình metrics qua các fold
avg_metrics = {
    "rouge1": np.mean([m["rouge1"] for m in fold_metrics]),
    "rouge2": np.mean([m["rouge2"] for m in fold_metrics]),
    "rougeL": np.mean([m["rougeL"] for m in fold_metrics]),
    "meteor": np.mean([m["meteor"] for m in fold_metrics]),
    "cosine_similarity": np.mean([m["cosine_similarity"] for m in fold_metrics]),
}
print("\nAverage Cross-Validation Metrics:", avg_metrics)

# Lưu metrics trung bình
with open("cross_validation_metrics_dama_2.json", "w") as f:
    json.dump(avg_metrics, f, indent=4)


Training Fold 1...


loading file vocab.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/vocab.json
loading file merges.txt from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/merges.txt
loading file tokenizer.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/tokenizer.json
loading file added_tokens.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/added_tokens.json
loading file special_tokens_map.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/special_tokens_map.json
loading file tokenizer_config.json from cache at /home/th

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(50261, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): Lla

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
PyTorch: setting up devices


Fold 1 - Train size: 1176, Eval size: 295


loading file vocab.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/vocab.json
loading file merges.txt from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/merges.txt
loading file tokenizer.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/tokenizer.json
loading file added_tokens.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/added_tokens.json
loading file special_tokens_map.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/special_tokens_map.json
loading file tokenizer_config.json from cache at /home/th

Step,Training Loss,Validation Loss
10,3.6176,3.766991
20,3.5695,3.152803
30,2.6337,2.340977
40,2.1846,2.010775
50,1.9963,1.863515
60,1.8276,1.800313
70,1.7783,1.734739
80,1.6985,1.71445
90,1.6316,1.67429
100,1.585,1.652105


The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: input, output, text. If input, output, text are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 295
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: input, output, text. If input, output, text are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 295
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: input, output, text. If input, output, text are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2

Fold 1 Metrics: {'rouge1': np.float64(0.6080373890335842), 'rouge2': np.float64(0.2839179126232185), 'rougeL': np.float64(0.3963914164861611), 'meteor': np.float64(0.3378864657966599), 'cosine_similarity': np.float64(0.7792880975593954)}
Thời gian chạy Fold 1: 1273.40 giây

Training Fold 2...


loading file vocab.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/vocab.json
loading file merges.txt from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/merges.txt
loading file tokenizer.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/tokenizer.json
loading file added_tokens.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/added_tokens.json
loading file special_tokens_map.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/special_tokens_map.json
loading file tokenizer_config.json from cache at /home/th

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(50261, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): Lla

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
PyTorch: setting up devices


Fold 2 - Train size: 1177, Eval size: 294


loading file vocab.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/vocab.json
loading file merges.txt from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/merges.txt
loading file tokenizer.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/tokenizer.json
loading file added_tokens.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/added_tokens.json
loading file special_tokens_map.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/special_tokens_map.json
loading file tokenizer_config.json from cache at /home/th

Step,Training Loss,Validation Loss
10,3.5169,3.75351
20,3.5852,3.134556
30,2.7034,2.325836
40,2.2711,2.019816
50,1.9522,1.864777
60,1.8838,1.801669
70,1.793,1.737699
80,1.8518,1.719018
90,1.5936,1.682201
100,1.6908,1.658917


The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: input, output, text. If input, output, text are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 294
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: input, output, text. If input, output, text are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 294
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: input, output, text. If input, output, text are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2

Fold 2 Metrics: {'rouge1': np.float64(0.6033686932342311), 'rouge2': np.float64(0.27590676501955114), 'rougeL': np.float64(0.3911027358696688), 'meteor': np.float64(0.33413824059076264), 'cosine_similarity': np.float64(0.7775095081856461)}
Thời gian chạy Fold 2: 1275.50 giây

Training Fold 3...


loading file vocab.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/vocab.json
loading file merges.txt from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/merges.txt
loading file tokenizer.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/tokenizer.json
loading file added_tokens.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/added_tokens.json
loading file special_tokens_map.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/special_tokens_map.json
loading file tokenizer_config.json from cache at /home/th

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(50261, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): Lla

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
PyTorch: setting up devices


Fold 3 - Train size: 1177, Eval size: 294


loading file vocab.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/vocab.json
loading file merges.txt from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/merges.txt
loading file tokenizer.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/tokenizer.json
loading file added_tokens.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/added_tokens.json
loading file special_tokens_map.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/special_tokens_map.json
loading file tokenizer_config.json from cache at /home/th

Step,Training Loss,Validation Loss
10,3.5401,3.771364
20,3.6183,3.165943
30,2.7156,2.412363
40,2.2273,2.089988
50,1.9341,1.948927
60,1.8188,1.880835
70,1.7287,1.82284
80,1.86,1.776943
90,1.614,1.761228
100,1.5899,1.728144


The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: input, output, text. If input, output, text are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 294
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: input, output, text. If input, output, text are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 294
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: input, output, text. If input, output, text are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2

Fold 3 Metrics: {'rouge1': np.float64(0.6030378441413574), 'rouge2': np.float64(0.27300814763028436), 'rougeL': np.float64(0.3871245907915382), 'meteor': np.float64(0.32423325003701853), 'cosine_similarity': np.float64(0.7698658208052317)}
Thời gian chạy Fold 3: 1325.63 giây

Training Fold 4...


loading file vocab.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/vocab.json
loading file merges.txt from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/merges.txt
loading file tokenizer.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/tokenizer.json
loading file added_tokens.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/added_tokens.json
loading file special_tokens_map.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/special_tokens_map.json
loading file tokenizer_config.json from cache at /home/th

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(50261, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): Lla

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
PyTorch: setting up devices


Fold 4 - Train size: 1177, Eval size: 294


loading file vocab.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/vocab.json
loading file merges.txt from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/merges.txt
loading file tokenizer.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/tokenizer.json
loading file added_tokens.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/added_tokens.json
loading file special_tokens_map.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/special_tokens_map.json
loading file tokenizer_config.json from cache at /home/th

Step,Training Loss,Validation Loss
10,3.5215,3.740305
20,3.6018,3.156959
30,2.7587,2.368032
40,2.2232,2.086433
50,1.9915,1.938474
60,1.8759,1.861773
70,1.7852,1.793044
80,1.8943,1.758853
90,1.6693,1.713314
100,1.6501,1.684365


The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: input, output, text. If input, output, text are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 294
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: input, output, text. If input, output, text are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 294
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: input, output, text. If input, output, text are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2

Fold 4 Metrics: {'rouge1': np.float64(0.6177479234413816), 'rouge2': np.float64(0.2908350733004069), 'rougeL': np.float64(0.40483708056663426), 'meteor': np.float64(0.34675785574108675), 'cosine_similarity': np.float64(0.7809572793594023)}
Thời gian chạy Fold 4: 1293.38 giây

Training Fold 5...


loading file vocab.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/vocab.json
loading file merges.txt from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/merges.txt
loading file tokenizer.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/tokenizer.json
loading file added_tokens.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/added_tokens.json
loading file special_tokens_map.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/special_tokens_map.json
loading file tokenizer_config.json from cache at /home/th

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(50261, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): Lla

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
PyTorch: setting up devices


Fold 5 - Train size: 1177, Eval size: 294


loading file vocab.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/vocab.json
loading file merges.txt from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/merges.txt
loading file tokenizer.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/tokenizer.json
loading file added_tokens.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/added_tokens.json
loading file special_tokens_map.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/special_tokens_map.json
loading file tokenizer_config.json from cache at /home/th

Step,Training Loss,Validation Loss
10,3.5457,3.755609
20,3.5674,3.15375
30,2.6878,2.386022
40,2.1997,2.138365
50,2.003,1.96179
60,1.8175,1.884859
70,1.8181,1.807978
80,1.9138,1.776138
90,1.6317,1.736375
100,1.6543,1.710684


The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: input, output, text. If input, output, text are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 294
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: input, output, text. If input, output, text are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 294
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: input, output, text. If input, output, text are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2

Fold 5 Metrics: {'rouge1': np.float64(0.6100504248892045), 'rouge2': np.float64(0.27938905426389193), 'rougeL': np.float64(0.3881090308649977), 'meteor': np.float64(0.33715025729412346), 'cosine_similarity': np.float64(0.7706833246613847)}
Thời gian chạy Fold 5: 1325.84 giây

=== Kết thúc huấn luyện K-Fold ===
Tổng thời gian chạy: 6495.39 giây
Thời gian trung bình mỗi fold: 1298.75 giây

Average Cross-Validation Metrics: {'rouge1': np.float64(0.6084484549479517), 'rouge2': np.float64(0.2806113905674706), 'rougeL': np.float64(0.39351297091579995), 'meteor': np.float64(0.33603321389193025), 'cosine_similarity': np.float64(0.7756608061142121)}


In [11]:
import json
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import gc
from dotenv import load_dotenv
# Tải biến môi trường từ file .env
load_dotenv()

# Đọc access token từ biến môi trường
hf_token = os.getenv("HF_DAMA2")
print(hf_token)

# --- Phần 1: Tìm fold có metric tốt nhất ---

def load_metrics_from_file(file_path):
    """Đọc metrics từ file JSON."""
    try:
        with open(file_path, 'r') as f:
            return json.load(f)
    except FileNotFoundError:
        print(f"Cảnh báo: Không tìm thấy file metric: {file_path}")
        return None
    except json.JSONDecodeError:
        print(f"Cảnh báo: File metric không phải JSON hợp lệ: {file_path}")
        return None

def find_best_fold_by_metric(metrics_base_path, num_folds, metric_name_to_optimize, higher_is_better=True):
    """
    Tìm fold có giá trị metric được chỉ định cao nhất (hoặc thấp nhất).

    Args:
        metrics_base_path (str): Đường dẫn cơ sở đến thư mục chứa các file metrics.
                                 Ví dụ: "./" nếu các file ở thư mục hiện tại.
        num_folds (int): Tổng số lượng folds.
        metric_name_to_optimize (str): Tên của metric dùng để so sánh (ví dụ: 'rougeL', 'cosine_similarity').
        higher_is_better (bool): True nếu giá trị metric cao hơn là tốt hơn, False nếu thấp hơn là tốt hơn.

    Returns:
        tuple: (best_fold_number, best_metric_value, all_metrics_of_best_fold) hoặc (None, None, None) nếu lỗi.
    """
    best_fold_so_far = None
    best_metric_val = -float('inf') if higher_is_better else float('inf')
    all_metrics_best_fold = None

    print(f"Đang tìm fold tốt nhất dựa trên metric: '{metric_name_to_optimize}' (cao hơn là tốt hơn: {higher_is_better})")

    for i in range(1, num_folds + 1):
        # Giả sử tên file là "fold_X_metrics" hoặc "fold_X_metrics.json"
        # Sửa lại mẫu tên file nếu cần
        metric_file_candidate_1 = os.path.join(metrics_base_path, f"fold_{i}_metrics_dama_2.json")
        metric_file_candidate_2 = os.path.join(metrics_base_path, f"fold_{i}_metrics_dama_2") # Không có đuôi .json

        metrics_data = None
        if os.path.exists(metric_file_candidate_1):
            metrics_data = load_metrics_from_file(metric_file_candidate_1)
        elif os.path.exists(metric_file_candidate_2):
            metrics_data = load_metrics_from_file(metric_file_candidate_2)
        else:
            print(f"Không tìm thấy file metric cho fold {i} tại: {metric_file_candidate_1} hoặc {metric_file_candidate_2}")
            continue

        if metrics_data is None:
            continue # Bỏ qua nếu không đọc được file

        if metric_name_to_optimize not in metrics_data:
            print(f"Cảnh báo: Metric '{metric_name_to_optimize}' không có trong file của fold {i}. Bỏ qua fold này để so sánh.")
            continue

        current_metric_val = metrics_data[metric_name_to_optimize]
        print(f"Fold {i}: '{metric_name_to_optimize}' = {current_metric_val:.4f}")

        if higher_is_better:
            if current_metric_val > best_metric_val:
                best_metric_val = current_metric_val
                best_fold_so_far = i
                all_metrics_best_fold = metrics_data
        else: # lower is better
            if current_metric_val < best_metric_val:
                best_metric_val = current_metric_val
                best_fold_so_far = i
                all_metrics_best_fold = metrics_data

    if best_fold_so_far is not None:
        print(f"\n=> Fold tốt nhất được chọn: Fold {best_fold_so_far} với {metric_name_to_optimize} = {best_metric_val:.4f}")
        # print(f"Toàn bộ metrics của fold {best_fold_so_far}: {all_metrics_best_fold}")
        return best_fold_so_far, best_metric_val, all_metrics_best_fold
    else:
        print(f"\n=> Không thể xác định fold tốt nhất dựa trên metric '{metric_name_to_optimize}'.")
        return None, None, None

# --- Cấu hình cho việc tìm fold ---
# Đặt đường dẫn đến thư mục chứa các file fold_X_metrics của bạn
# Ví dụ: nếu các file fold_1_metrics, fold_2_metrics,... nằm cùng thư mục với script này:
METRICS_FILES_DIRECTORY = "./" 
NUM_TOTAL_FOLDS = 5
# Chọn metric bạn muốn sử dụng để quyết định fold nào tốt nhất
# Ví dụ: 'rougeL', 'cosine_similarity', 'meteor', 'rouge1', 'rouge2'
METRIC_TO_OPTIMIZE_FOR = "cosine_similarity" # THAY ĐỔI TÊN METRIC NÀY NẾU CẦN

best_fold_id, _, _ = find_best_fold_by_metric(
    METRICS_FILES_DIRECTORY,
    NUM_TOTAL_FOLDS,
    METRIC_TO_OPTIMIZE_FOR,
    higher_is_better=True # Hầu hết các metric này, cao hơn là tốt hơn
)

if best_fold_id is None:
    print("Không thể xác định fold tốt nhất. Sẽ sử dụng một fold mặc định hoặc dừng chương trình.")
    # Gán một fold mặc định nếu muốn tiếp tục
    default_fold_if_not_found = 4 # Ví dụ, bạn có thể muốn mặc định là fold 4
    print(f"Sử dụng fold mặc định: {default_fold_if_not_found}")
    best_fold_id = default_fold_if_not_found
    # Hoặc bạn có thể dừng chương trình:
    # exit("Dừng chương trình do không tìm được fold tốt nhất.")

# --- Phần 2: Merge model sử dụng adapter từ fold tốt nhất ---
# Sửa đổi để merge vào base model ở định dạng full/half precision

print(f"\n--- Bắt đầu quá trình merge model cho Fold {best_fold_id} ---")

try:
    # Giải phóng bộ nhớ trước khi tải model lớn
    torch.cuda.empty_cache()
    gc.collect()
    print("Đã giải phóng bộ nhớ GPU (nếu có).")

    # Cấu hình tải mô hình gốc
    base_model_name = "vietgpt/dama-2-7b-chat" # Thay bằng model base của bạn nếu khác
    # Dựa trên lỗi trước đó, model Vinallama có tên khác (Viet-Mistral/Vinallama-7B-Chat?)
    # Bạn cần chắc chắn base_model_name ở đây là model gốc bạn đã dùng để fine-tune
    # Ví dụ: base_model_name = "Viet-Mistral/Vinallama-7B-Chat" # <-- KHẢ NĂNG CAO BẠN CẦN THAY ĐỔI Ở ĐÂY

    print(f"Đang tải tokenizer cho model: {base_model_name}...")
    tokenizer = AutoTokenizer.from_pretrained(
        base_model_name,
        token=hf_token  # Sử dụng token từ biến môi trường
    )
    print("Tokenizer đã được tải.")

    # Tải base model ở định dạng Bfloat16 (hoặc Float16) - KHÔNG DÙNG BitsAndBytes
    print(f"Đang tải base model '{base_model_name}' ở định dạng BF16 (không lượng tử hóa BitsAndBytes)...")
    # Bỏ hoàn toàn quantization_config khi tải base model
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        # quantization_config=quantization_config, # <-- BỎ DÒNG NÀY!
        torch_dtype=torch.bfloat16, # Tải ở bfloat16 (kích thước lớn)
        device_map="cuda", # Vẫn dùng auto device_map để phân bổ lên GPU nếu có đủ VRAM
        token=hf_token
    )
    print("Base model đã được tải thành công ở định dạng BF16.")

    # Đường dẫn tới adapter LoRA của fold tốt nhất
    # Cần điều chỉnh đường dẫn này cho phù hợp với cấu trúc thư mục của bạn
    # Đảm bảo bạn sử dụng tên thư mục fine-tuned adapter đúng với fold tốt nhất tìm được ở Phần 1
    fine_tuned_adapters_base_dir = "/home/thanhnguyenvq2403/model/KLTN/" # Giả định thư mục chứa fine-tuned models
    adapter_path_for_best_fold = os.path.join(fine_tuned_adapters_base_dir, f"finetuned_dama_2_fold_{best_fold_id}") # <--- SỬA TÊN THƯ MỤC ADAPTER NẾU CẦN (ví dụ: vinallama thay vì seaLLM)

    print(f"Đang tải adapter LoRA từ: {adapter_path_for_best_fold}")
    if not os.path.isdir(adapter_path_for_best_fold):
        raise FileNotFoundError(f"Lỗi: Thư mục adapter LoRA không tồn tại: {adapter_path_for_best_fold}")

    lora_model = PeftModel.from_pretrained(base_model, adapter_path_for_best_fold, is_trainable=False)
    print("Adapter LoRA đã được tải.")

    # Hợp nhất adapter vào base model (ở định dạng BF16)
    # Kết quả merged_model sẽ là mô hình dense ở định dạng BF16
    print("Đang hợp nhất adapter LoRA vào base model (BF16)...")
    merged_model = lora_model.merge_and_unload()
    print("Hợp nhất adapter thành công. Mô hình đã hợp nhất ở định dạng BF16.")

    # Lưu mô hình đã hợp nhất
    # Đặt tên cho thư mục lưu model đã merge.
    # Tên này nên phản ánh rằng nó đã merge và ở định dạng không lượng tử hóa BitsAndBytes
    output_merged_model_dir = f"/home/thanhnguyenvq2403/model/KLTN/merged_dama_2_fold_{best_fold_id}_bf16" # Đổi tên cho rõ định dạng
    print(f"Đang lưu mô hình đã hợp nhất vào: {output_merged_model_dir}")

    # Đảm bảo thư mục output tồn tại
    os.makedirs(output_merged_model_dir, exist_ok=True)

    merged_model.save_pretrained(output_merged_model_dir, safe_serialization=True) # Nên dùng safe_serialization
    tokenizer.save_pretrained(output_merged_model_dir) # Lưu cả tokenizer

    print(f"Mô hình đã hợp nhất và tokenizer đã được lưu vào: {output_merged_model_dir}")

    # Dọn dẹp bộ nhớ
    del base_model
    del lora_model
    del merged_model
    torch.cuda.empty_cache()
    gc.collect()
    print("Đã dọn dẹp bộ nhớ.")


except FileNotFoundError as e:
     print(f"LỖI FILE: {e}")
except Exception as e:
    print(f"ĐÃ CÓ LỖI XẢY RA TRONG QUÁ TRÌNH MERGE MODEL: {e}")
    import traceback
    traceback.print_exc()

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
Đang tìm fold tốt nhất dựa trên metric: 'cosine_similarity' (cao hơn là tốt hơn: True)
Fold 1: 'cosine_similarity' = 0.7793
Fold 2: 'cosine_similarity' = 0.7775
Fold 3: 'cosine_similarity' = 0.7699
Fold 4: 'cosine_similarity' = 0.7810
Fold 5: 'cosine_similarity' = 0.7707

=> Fold tốt nhất được chọn: Fold 4 với cosine_similarity = 0.7810

--- Bắt đầu quá trình merge model cho Fold 4 ---
Đã giải phóng bộ nhớ GPU (nếu có).
Đang tải tokenizer cho model: vietgpt/dama-2-7b-chat...


loading file vocab.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/vocab.json
loading file merges.txt from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/merges.txt
loading file tokenizer.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/tokenizer.json
loading file added_tokens.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/added_tokens.json
loading file special_tokens_map.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/special_tokens_map.json
loading file tokenizer_config.json from cache at /home/th

Tokenizer đã được tải.
Đang tải base model 'vietgpt/dama-2-7b-chat' ở định dạng BF16 (không lượng tử hóa BitsAndBytes)...


loading configuration file config.json from cache at /home/thanhnguyenvq2403/.cache/huggingface/hub/models--vietgpt--dama-2-7b-chat/snapshots/efacdb053b58e26fe1de9e6b6faf0224367f78a1/config.json
Model config LlamaConfig {
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 11008,
  "max_position_embeddings": 2048,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "pad_token_id": 0,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": null,
  "rope_theta": 10000.0,
  "tie_word_embeddings": false,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.51.3",
  "use_cache": true,
  "vocab_size": 50261
}

loading weights file pytorch_model.bin from cache at /home/thanhnguyenvq2403/.ca

Base model đã được tải thành công ở định dạng BF16.
Đang tải adapter LoRA từ: /home/thanhnguyenvq2403/model/KLTN/finetuned_dama_2_fold_4


Configuration saved in /home/thanhnguyenvq2403/model/KLTN/merged_dama_2_fold_4_bf16/config.json
Configuration saved in /home/thanhnguyenvq2403/model/KLTN/merged_dama_2_fold_4_bf16/generation_config.json


Adapter LoRA đã được tải.
Đang hợp nhất adapter LoRA vào base model (BF16)...
Hợp nhất adapter thành công. Mô hình đã hợp nhất ở định dạng BF16.
Đang lưu mô hình đã hợp nhất vào: /home/thanhnguyenvq2403/model/KLTN/merged_dama_2_fold_4_bf16


The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 3 checkpoint shards. You can find where each parameters has been saved in the index located at /home/thanhnguyenvq2403/model/KLTN/merged_dama_2_fold_4_bf16/model.safetensors.index.json.
tokenizer config file saved in /home/thanhnguyenvq2403/model/KLTN/merged_dama_2_fold_4_bf16/tokenizer_config.json
Special tokens file saved in /home/thanhnguyenvq2403/model/KLTN/merged_dama_2_fold_4_bf16/special_tokens_map.json


Mô hình đã hợp nhất và tokenizer đã được lưu vào: /home/thanhnguyenvq2403/model/KLTN/merged_dama_2_fold_4_bf16
Đã dọn dẹp bộ nhớ.
