In [1]:
# @title Phần 1: Setup Environment & Load SST-2
# !pip install -q transformers datasets evaluate scikit-learn accelerate psutil

import os
import time
import psutil
import torch
import pandas as pd
import numpy as np
import gc
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback
)
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
from scipy.special import softmax

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# 1. Setup Path (Nơi lưu kết quả)
SAVE_PATH = './Falcon_1B_SST2_Results'
if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)

# 2. Check Device & GPU (RTX A5000)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device} - Falcon-1B_FTT_SST-2.ipynb:30")
if device.type == 'cuda':
    print(f"GPU Name: {torch.cuda.get_device_name(0)} - Falcon-1B_FTT_SST-2.ipynb:32")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB - Falcon-1B_FTT_SST-2.ipynb:33")

# 3. Load SST-2 Dataset
print("Loading SST2 (Stanford Sentiment Treebank) - Falcon-1B_FTT_SST-2.ipynb:36")
dataset = load_dataset("glue", "sst2")
print(">>> Dataset Loaded Successfully! - Falcon-1B_FTT_SST-2.ipynb:38")

  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda - Falcon-1B_FTT_SST-2.ipynb:30
GPU Name: NVIDIA RTX A5000 - Falcon-1B_FTT_SST-2.ipynb:32
VRAM: 25.76 GB - Falcon-1B_FTT_SST-2.ipynb:33
Loading SST2 (Stanford Sentiment Treebank) - Falcon-1B_FTT_SST-2.ipynb:36
>>> Dataset Loaded Successfully! - Falcon-1B_FTT_SST-2.ipynb:38


In [2]:
# @title Phần 2: Tokenization (Falcon-1B)
MODEL_NAME = 'tiiuae/falcon-rw-1b'

print(f"Loading Tokenizer: {MODEL_NAME}... - Falcon-1B_FTT_SST-2.ipynb:4")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# LƯU Ý QUAN TRỌNG: Falcon không có pad_token mặc định -> Gán bằng eos_token
tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(examples):
    # SST-2 sử dụng cột 'sentence'
    # Giữ max_length=512 để đồng nhất benchmark với IMDB
    return tokenizer(
        examples["sentence"],
        padding="max_length",
        truncation=True,
        max_length=64
    )

print("Tokenizing Dataset... - Falcon-1B_FTT_SST-2.ipynb:20")
tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Chuẩn hóa tên cột cho PyTorch
tokenized_datasets = tokenized_datasets.remove_columns(["sentence", "idx"])
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format("torch")

# Chia tập train/val
dataset_train = tokenized_datasets["train"]
dataset_val = tokenized_datasets["validation"]

print(f">>> Tokenization Complete. Train size: {len(dataset_train)}, Val size: {len(dataset_val)} - Falcon-1B_FTT_SST-2.ipynb:32")

Loading Tokenizer: tiiuae/falcon-rw-1b... - Falcon-1B_FTT_SST-2.ipynb:4
Tokenizing Dataset... - Falcon-1B_FTT_SST-2.ipynb:20


Map: 100%|██████████| 872/872 [00:00<00:00, 7464.91 examples/s]

>>> Tokenization Complete. Train size: 67349, Val size: 872 - Falcon-1B_FTT_SST-2.ipynb:32





In [3]:
# @title Phần 3: Load Model (BF16 Optimized)

# Dọn dẹp VRAM trước khi load
torch.cuda.empty_cache()
gc.collect()

print(f"Loading Model: {MODEL_NAME} in BFloat16... - Falcon-1B_FTT_SST-2.ipynb:7")
try:
    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME,
        num_labels=2,
        # A5000 hỗ trợ Bfloat16 -> Chạy nhanh & ổn định hơn FP16
        torch_dtype=torch.bfloat16,
        use_safetensors=True
    )
    print(">>> Loaded successfully using SafeTensors (BF16). - Falcon-1B_FTT_SST-2.ipynb:16")
except Exception as e:
    print(f">>> SafeTensors failed ({e}). Loading legacy mode... - Falcon-1B_FTT_SST-2.ipynb:18")
    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME,
        num_labels=2,
        torch_dtype=torch.bfloat16,
        use_safetensors=False
    )

model.to(device)
model.config.pad_token_id = tokenizer.pad_token_id

# Bật Gradient Checkpointing để tiết kiệm VRAM nếu cần
model.gradient_checkpointing_enable()
print(">>> Model Ready on GPU! - Falcon-1B_FTT_SST-2.ipynb:31")

Loading Model: tiiuae/falcon-rw-1b in BFloat16... - Falcon-1B_FTT_SST-2.ipynb:7


`torch_dtype` is deprecated! Use `dtype` instead!
Some weights of FalconForSequenceClassification were not initialized from the model checkpoint at tiiuae/falcon-rw-1b and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


>>> Loaded successfully using SafeTensors (BF16). - Falcon-1B_FTT_SST-2.ipynb:16
>>> Model Ready on GPU! - Falcon-1B_FTT_SST-2.ipynb:31


In [4]:
# @title Phần 4: Training Config & Execution

# 1. Định nghĩa Metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    if isinstance(logits, tuple):
        logits = logits[0]

    probs = softmax(logits, axis=1)[:, 1]

    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary')
    acc = accuracy_score(labels, predictions)

    try:
        roc_auc = roc_auc_score(labels, probs)
    except:
        roc_auc = 0.0

    return {'accuracy': acc, 'f1': f1, 'precision': precision, 'recall': recall, 'roc_auc': roc_auc}

# 2. Training Arguments 
training_args = TrainingArguments(
    output_dir='./results_falcon_sst2',
    num_train_epochs=3,

    # --- CẤU HÌNH TỐI ƯU VRAM 24GB ---
    per_device_train_batch_size=16,
    gradient_accumulation_steps=2,    
    per_device_eval_batch_size=32,

    learning_rate=2e-5,
    warmup_steps=100,
    weight_decay=0.01,
    logging_dir='./logs_falcon',
    logging_steps=50,

    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    save_total_limit=2,

    # --- QUAN TRỌNG CHO A5000 ---
    fp16=False,     # Tắt FP16 để tránh lỗi unscale
    bf16=True,      # Bật BF16 cho dòng Ampere
    
    report_to="none",
    optim="adamw_torch",
    dataloader_num_workers=0 # Windows fix
)

# 3. Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset_train,
    eval_dataset=dataset_val,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

# 4. Start Training
print("Starting Training (Falcon1B on SST2 | Turbo Mode BF16) - Falcon-1B_FTT_SST-2.ipynb:65")
start_train_time = time.time()
trainer.train()
end_train_time = time.time()
training_time = end_train_time - start_train_time

print(f"\n>>> Training Finished in {training_time:.2f} seconds. - Falcon-1B_FTT_SST-2.ipynb:71")

# 5. Save Model
print(f"Saving model to {SAVE_PATH}... - Falcon-1B_FTT_SST-2.ipynb:74")
trainer.save_model(SAVE_PATH)
tokenizer.save_pretrained(SAVE_PATH)

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Starting Training (Falcon1B on SST2 | Turbo Mode BF16) - Falcon-1B_FTT_SST-2.ipynb:65


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Roc Auc
1,0.1262,0.131821,0.958716,0.959821,0.951327,0.968468,0.989428
2,0.0507,0.160593,0.958716,0.959551,0.957399,0.961712,0.988962
3,0.048,0.183409,0.959862,0.96063,0.959551,0.961712,0.988731



>>> Training Finished in 5390.01 seconds. - Falcon-1B_FTT_SST-2.ipynb:71
Saving model to ./Falcon_1B_SST2_Results... - Falcon-1B_FTT_SST-2.ipynb:74


('./Falcon_1B_SST2_Results\\tokenizer_config.json',
 './Falcon_1B_SST2_Results\\special_tokens_map.json',
 './Falcon_1B_SST2_Results\\vocab.json',
 './Falcon_1B_SST2_Results\\merges.txt',
 './Falcon_1B_SST2_Results\\added_tokens.json',
 './Falcon_1B_SST2_Results\\tokenizer.json')

In [7]:
# @title 5. Final Report (Full Metrics & Resources) - Falcon Compatible
import time
import os
import torch
import psutil
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
from scipy.special import softmax

print("Running Final Evaluation (SST2 Test Set) - Falcon-1B_FTT_SST-2.ipynb:11")

# 1. Prediction
start_pred = time.time()
predictions_output = trainer.predict(dataset_val)
end_pred = time.time()

# --- TÍNH TOÁN METRICS THỦ CÔNG (Để đảm bảo không bị số 0) ---
raw_logits = predictions_output.predictions
if isinstance(raw_logits, tuple): raw_logits = raw_logits[0]
true_labels = predictions_output.label_ids

# Chuyển đổi logits -> nhãn và xác suất
pred_labels = np.argmax(raw_logits, axis=-1)
pred_probs = softmax(raw_logits, axis=1)[:, 1]

# Tính toán chuẩn xác bằng Sklearn
acc = accuracy_score(true_labels, pred_labels)
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, pred_labels, average='binary')
roc_auc = roc_auc_score(true_labels, pred_probs)

# Gán ngược vào dictionary 'metrics' để code in bên dưới hoạt động đúng
metrics = {
    'test_accuracy': acc,
    'test_precision': precision,
    'test_recall': recall,
    'test_f1': f1,
    'test_roc_auc': roc_auc
}

total_samples = len(dataset_val)
latency = ((end_pred - start_pred) / total_samples) * 1000 # ms/sample

# 2. Save & Calculate Size (Sửa cho Falcon/PEFT)
# Falcon Prompt Tuning dùng trainer.save_model hoặc model.save_pretrained
trainer.save_model(SAVE_PATH) 

# Kiểm tra các file có thể được sinh ra bởi Falcon/PEFT
possible_files = ['adapter_model.bin', 'adapter_model.safetensors', 'pytorch_model.bin', 'model.safetensors']
model_size_mb = 0

for file_name in possible_files:
    file_path = os.path.join(SAVE_PATH, file_name)
    if os.path.exists(file_path):
        size = os.path.getsize(file_path) / (1024**2)
        model_size_mb += size

# 3. Resource Usage Monitoring
process = psutil.Process(os.getpid())
ram_usage = process.memory_info().rss / (1024 ** 2)
vram_usage = torch.cuda.max_memory_allocated() / (1024 ** 2) if torch.cuda.is_available() else 0

# Xử lý biến training_time (nếu chưa có thì gán = 0)
try:
    training_time = training_time
except NameError:
    training_time = 0.0

# 4. Final Detailed Report (Y hệt mẫu bạn gửi)
print("\n====== REPORT: Falcon1B + Prompt Tuning (SST2) ====== - Falcon-1B_FTT_SST-2.ipynb:70")
print(f"1. Classification Metrics: - Falcon-1B_FTT_SST-2.ipynb:71")
print(f"Accuracy:  {metrics.get('test_accuracy', 0):.4f} - Falcon-1B_FTT_SST-2.ipynb:72")
print(f"Precision: {metrics.get('test_precision', 0):.4f} - Falcon-1B_FTT_SST-2.ipynb:73")
print(f"Recall:    {metrics.get('test_recall', 0):.4f} - Falcon-1B_FTT_SST-2.ipynb:74")
print(f"F1Score:  {metrics.get('test_f1', 0):.4f} - Falcon-1B_FTT_SST-2.ipynb:75")
print(f"ROCAUC:   {metrics.get('test_roc_auc', 0):.4f} - Falcon-1B_FTT_SST-2.ipynb:76")

print(f"\n2. Efficiency Metrics: - Falcon-1B_FTT_SST-2.ipynb:78")
print(f"Training Time:      {training_time:.2f} s - Falcon-1B_FTT_SST-2.ipynb:79")
print(f"Inference Latency:  {latency:.4f} ms/sample - Falcon-1B_FTT_SST-2.ipynb:80")
print(f"Adapter Size (Disk): {model_size_mb:.2f} MB - Falcon-1B_FTT_SST-2.ipynb:81")
print(f"Peak RAM Usage:     {ram_usage:.2f} MB - Falcon-1B_FTT_SST-2.ipynb:82")
print(f"Peak VRAM Usage:    {vram_usage:.2f} MB - Falcon-1B_FTT_SST-2.ipynb:83")

# 5. Save CSV
results_df = pd.DataFrame({
    "Metric": ["Accuracy", "Precision", "Recall", "F1", "ROC-AUC",
               "Training Time (s)", "Inference Latency (ms)", "Adapter Size (MB)",
               "Peak RAM (MB)", "Peak VRAM (MB)"],
    "Value": [
        metrics.get('test_accuracy', 0),
        metrics.get('test_precision', 0),
        metrics.get('test_recall', 0),
        metrics.get('test_f1', 0),
        metrics.get('test_roc_auc', 0),
        training_time,
        latency,
        model_size_mb,
        ram_usage,
        vram_usage
    ]
})

results_file = os.path.join(SAVE_PATH, 'falcon_sst2_adapters_report.csv')
results_df.to_csv(results_file, index=False)
print(f"\nReport saved to Drive: {results_file} - Falcon-1B_FTT_SST-2.ipynb:106")

Running Final Evaluation (SST2 Test Set) - Falcon-1B_FTT_SST-2.ipynb:11

1. Classification Metrics: - Falcon-1B_FTT_SST-2.ipynb:71
Accuracy:  0.9587 - Falcon-1B_FTT_SST-2.ipynb:72
Precision: 0.9595 - Falcon-1B_FTT_SST-2.ipynb:73
Recall:    0.9595 - Falcon-1B_FTT_SST-2.ipynb:74
F1Score:  0.9595 - Falcon-1B_FTT_SST-2.ipynb:75
ROCAUC:   0.9887 - Falcon-1B_FTT_SST-2.ipynb:76

2. Efficiency Metrics: - Falcon-1B_FTT_SST-2.ipynb:78
Training Time:      5390.01 s - Falcon-1B_FTT_SST-2.ipynb:79
Inference Latency:  10.5780 ms/sample - Falcon-1B_FTT_SST-2.ipynb:80
Adapter Size (Disk): 2501.77 MB - Falcon-1B_FTT_SST-2.ipynb:81
Peak RAM Usage:     1773.18 MB - Falcon-1B_FTT_SST-2.ipynb:82
Peak VRAM Usage:    12523.73 MB - Falcon-1B_FTT_SST-2.ipynb:83

Report saved to Drive: ./Falcon_1B_SST2_Results\falcon_sst2_adapters_report.csv - Falcon-1B_FTT_SST-2.ipynb:106
