In [1]:
# @title PHẦN 1: Setup Môi trường & Prompt Tuning Config
!pip install -q transformers peft datasets evaluate scikit-learn accelerate psutil

import os
import torch
import psutil
import numpy as np
from google.colab import drive
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, EarlyStoppingCallback
from peft import get_peft_model, PromptTuningConfig, TaskType, PromptTuningInit

# 1. Mount Drive
drive.mount('/content/drive')
SAVE_PATH = '/content/drive/My Drive/SLM_Research/SST2_Falcon1B_PromptTuning'
if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 2. Load Data & Tokenizer
print("--- Loading SST-2 ---")
dataset = load_dataset("glue", "sst2")
MODEL_NAME = 'tiiuae/falcon-rw-1b'

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token # Bắt buộc cho Falcon
tokenizer.padding_side = "right"

def tokenize_function(examples):
    return tokenizer(examples["sentence"], padding="max_length", truncation=True, max_length=64)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(["sentence", "idx"])
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format("torch")
dataset_train = tokenized_datasets["train"]
dataset_val = tokenized_datasets["validation"]

# 3. Load Model & Setup Prompt Tuning
print("--- Loading Falcon-1B (FP16) ---")
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=2,
    torch_dtype=torch.float16 # FP16 để tiết kiệm VRAM
)
model.config.pad_token_id = tokenizer.pad_token_id
model.gradient_checkpointing_enable() # Quan trọng để tránh OOM

# --- FIX LỖI VALUE ERROR (Khai báo rõ kích thước Falcon) ---
# Falcon-1B: hidden_size=2048, num_hidden_layers=24
print(f"Detected Config: Layers={model.config.num_hidden_layers}, Dim={model.config.hidden_size}")

peft_config = PromptTuningConfig(
    task_type=TaskType.SEQ_CLS,
    num_virtual_tokens=10,
    prompt_tuning_init=PromptTuningInit.RANDOM,

    # Khai báo tường minh để PEFT hiểu cấu trúc Falcon
    num_layers=model.config.num_hidden_layers,
    token_dim=model.config.hidden_size,
    num_attention_heads=model.config.num_attention_heads,
    num_transformer_submodules=1
)

model = get_peft_model(model, peft_config)
model.to(device)

print("\n--- Prompt Tuning Efficiency (Falcon-1B) ---")
model.print_trainable_parameters()

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hMounted at /content/drive
Using device: cuda
--- Loading SST-2 ---


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

sst2/train-00000-of-00001.parquet:   0%|          | 0.00/3.11M [00:00<?, ?B/s]

sst2/validation-00000-of-00001.parquet:   0%|          | 0.00/72.8k [00:00<?, ?B/s]

sst2/test-00000-of-00001.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

Map:   0%|          | 0/67349 [00:00<?, ? examples/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1821 [00:00<?, ? examples/s]

--- Loading Falcon-1B (FP16) ---


config.json: 0.00B [00:00, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


pytorch_model.bin:   0%|          | 0.00/2.62G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.62G [00:00<?, ?B/s]

Some weights of FalconForSequenceClassification were not initialized from the model checkpoint at tiiuae/falcon-rw-1b and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Detected Config: Layers=24, Dim=2048

--- Prompt Tuning Efficiency (Falcon-1B) ---
trainable params: 24,576 || all params: 1,311,653,888 || trainable%: 0.0019


In [4]:
# @title PHẦN 2: Smart Training (Falcon Prompt Tuning - Fixed FP16 Error)
import time
import torch
import numpy as np
from transformers.trainer_utils import get_last_checkpoint
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
from scipy.special import softmax

# --- FIX LỖI QUAN TRỌNG: Ép kiểu tham số Prompt về Float32 ---
# Bước này giúp tránh lỗi "unscale FP16 gradients"
print("--- Casting Prompt Embeddings to Float32 ---")
for param in model.parameters():
    if param.requires_grad:
        param.data = param.data.to(torch.float32)
# -------------------------------------------------------------

# 1. Metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    if isinstance(logits, tuple): logits = logits[0]

    predictions = np.argmax(logits, axis=-1)
    probs = softmax(logits, axis=1)[:, 1]

    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary')
    acc = accuracy_score(labels, predictions)
    roc_auc = roc_auc_score(labels, probs)
    return {'accuracy': acc, 'f1': f1, 'precision': precision, 'recall': recall, 'roc_auc': roc_auc}

# 2. Training Args (Giữ nguyên cấu hình tối ưu)
training_args = TrainingArguments(
    output_dir=SAVE_PATH,
    num_train_epochs=3,
    per_device_train_batch_size=4,    # Batch nhỏ
    gradient_accumulation_steps=4,    # Tích lũy để đủ batch 16
    gradient_checkpointing=True,      # Tiết kiệm VRAM

    # Checkpoint & Save
    save_strategy="epoch",
    save_total_limit=1,

    eval_strategy="epoch",


    learning_rate=5e-5,
    warmup_steps=500,
    logging_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    fp16=True,
    report_to="none"
)

# 3. Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset_train,
    eval_dataset=dataset_val,
    processing_class=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

# 4. Auto Resume & Train
model.config.use_cache = False # Tắt cache để tránh warning với gradient checkpointing

print(f"--- Checking Drive: {SAVE_PATH} ---")
last_checkpoint = get_last_checkpoint(SAVE_PATH)

if last_checkpoint:
    print(f"🔄 Found checkpoint: {last_checkpoint}")
    print("🚀 Resuming training...")
    trainer.train(resume_from_checkpoint=last_checkpoint)
else:
    print("✨ Starting new training...")
    trainer.train()

# 5. Save Final
trainer.save_model(SAVE_PATH)
print("Training Completed!")

--- Casting Prompt Embeddings to Float32 ---
--- Checking Drive: /content/drive/My Drive/SLM_Research/SST2_Falcon1B_PromptTuning ---
🔄 Found checkpoint: /content/drive/My Drive/SLM_Research/SST2_Falcon1B_PromptTuning/checkpoint-4000
🚀 Resuming training...


	save_steps: 500 (from args) != 1000 (from trainer_state.json)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Roc Auc
1,0.6016,0.662137,0.569954,0.291115,0.905882,0.173423,0.78641


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Roc Auc
1,0.6016,0.662137,0.569954,0.291115,0.905882,0.173423,0.78641
2,0.5446,0.548754,0.774083,0.756489,0.838356,0.689189,0.846713
3,0.5238,0.53015,0.772936,0.780973,0.767391,0.795045,0.855077


Training Completed!


In [3]:
# Kiểm tra xem máy đang hiểu Batch Size là bao nhiêu
print(f"1. Số lượng dữ liệu Train: {len(dataset_train)} dòng")
print(f"2. Batch Size bạn cài đặt: {training_args.per_device_train_batch_size}")
print(f"3. Batch Size thực tế trong Trainer cũ: {trainer.args.per_device_train_batch_size}")

if trainer.args.per_device_train_batch_size == 4:
    print(">>> KẾT LUẬN: Trainer VẪN ĐANG DÙNG CẤU HÌNH CŨ (Batch=4) NÊN MỚI RA 50K STEPS!")
else:
    print(">>> Cấu hình đã đúng.")

1. Số lượng dữ liệu Train: 67349 dòng
2. Batch Size bạn cài đặt: 16
3. Batch Size thực tế trong Trainer cũ: 16
>>> Cấu hình đã đúng.


In [5]:
# @title PHẦN 3: Final Report (Full Metrics & Resources)
import os
import time
import psutil
import torch
import pandas as pd
import numpy as np

print("--- Running Final Evaluation ---")

# 1. Prediction
start_pred = time.time()
predictions_output = trainer.predict(dataset_val)
end_pred = time.time()

metrics = predictions_output.metrics
latency = ((end_pred - start_pred) / len(dataset_val)) * 1000

# 2. Size Check (Prompt Embedding Only)
# Prompt tuning chỉ lưu các vector embedding mới, kích thước rất nhỏ (KB)
adapter_bin = os.path.join(SAVE_PATH, 'adapter_model.bin')
adapter_safe = os.path.join(SAVE_PATH, 'adapter_model.safetensors')

model_size = 0
if os.path.exists(adapter_bin): model_size = os.path.getsize(adapter_bin)
if os.path.exists(adapter_safe): model_size = os.path.getsize(adapter_safe)

model_size_mb = model_size / (1024 * 1024)

# 3. Resources
process = psutil.Process(os.getpid())
ram_usage = process.memory_info().rss / (1024 ** 2)
vram_usage = torch.cuda.max_memory_allocated() / (1024 ** 2) if torch.cuda.is_available() else 0
current_training_time = training_time if 'training_time' in locals() else 0.0

# 4. Report
print("\n====== REPORT: Falcon-1B + Prompt Tuning ======")
print(f"1. Classification Metrics:")
print(f"   - Accuracy:  {metrics.get('test_accuracy', 0):.4f}")
print(f"   - Precision: {metrics.get('test_precision', 0):.4f}")
print(f"   - Recall:    {metrics.get('test_recall', 0):.4f}")
print(f"   - F1-Score:  {metrics.get('test_f1', 0):.4f}")
print(f"   - ROC-AUC:   {metrics.get('test_roc_auc', 0):.4f}")

print(f"\n2. Efficiency Metrics:")
print(f"   - Training Time:      {current_training_time:.2f} s")
print(f"   - Inference Latency:  {latency:.4f} ms/sample")
print(f"   - Adapter (Prompt) Size: {model_size_mb:.4f} MB")
print(f"   - Peak RAM Usage:     {ram_usage:.2f} MB")
print(f"   - Peak VRAM Usage:    {vram_usage:.2f} MB")

# 5. Save CSV
results_df = pd.DataFrame({
    "Metric": ["Accuracy", "Precision", "Recall", "F1", "ROC-AUC",
               "Training Time (s)", "Inference Latency (ms)", "Adapter Size (MB)",
               "Peak RAM (MB)", "Peak VRAM (MB)"],
    "Value": [
        metrics.get('test_accuracy', 0),
        metrics.get('test_precision', 0),
        metrics.get('test_recall', 0),
        metrics.get('test_f1', 0),
        metrics.get('test_roc_auc', 0),
        current_training_time,
        latency,
        model_size_mb,
        ram_usage,
        vram_usage
    ]
})
results_file = os.path.join(SAVE_PATH, 'sst2_falcon1b_prompt_full_report.csv')
results_df.to_csv(results_file, index=False)
print(f"\nReport saved to: {results_file}")

--- Running Final Evaluation ---



1. Classification Metrics:
   - Accuracy:  0.7741
   - Precision: 0.8384
   - Recall:    0.6892
   - F1-Score:  0.7565
   - ROC-AUC:   0.8467

2. Efficiency Metrics:
   - Training Time:      0.00 s
   - Inference Latency:  10.4091 ms/sample
   - Adapter (Prompt) Size: 0.0939 MB
   - Peak RAM Usage:     3510.18 MB
   - Peak VRAM Usage:    2588.94 MB

Report saved to: /content/drive/My Drive/SLM_Research/SST2_Falcon1B_PromptTuning/sst2_falcon1b_prompt_full_report.csv
