In [6]:
# @title 1. Setup Environment & Load SST-2
!pip install -q transformers peft datasets evaluate scikit-learn accelerate psutil

import os
import time
import psutil
import torch
import pandas as pd
import numpy as np
from google.colab import drive
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback
)
from peft import get_peft_model, LoraConfig, TaskType
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
from scipy.special import softmax

# 1. Mount Drive
drive.mount('/content/drive')
SAVE_PATH = '/content/drive/My Drive/SLM_Research/SST2_Falcon1B_LoRA'
if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 2. Load SST-2
print("--- Loading SST-2 ---")
dataset = load_dataset("glue", "sst2")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Using device: cuda
--- Loading SST-2 ---


In [7]:
# @title 2. Tokenization
MODEL_NAME = 'tiiuae/falcon-rw-1b'
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# Thi·∫øt l·∫≠p PAD Token b·∫±ng EOS Token (B·∫Øt bu·ªôc cho Falcon)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

def tokenize_function(examples):
    return tokenizer(examples["sentence"], padding="max_length", truncation=True, max_length=64)

print("--- Tokenizing Dataset (Falcon) ---")
tokenized_datasets = dataset.map(tokenize_function, batched=True)

tokenized_datasets = tokenized_datasets.remove_columns(["sentence", "idx"])
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format("torch")

dataset_train = tokenized_datasets["train"]
dataset_val = tokenized_datasets["validation"]

--- Tokenizing Dataset (Falcon) ---


Map:   0%|          | 0/872 [00:00<?, ? examples/s]

In [9]:
# @title 3. Falcon-1B + LoRA Configuration (Fixed FP16 Error)

# 1. Load Base Model (FP16)
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=2,
    torch_dtype=torch.float16 # Load model g·ªëc nh·∫π h∆°n
)
model.config.pad_token_id = tokenizer.pad_token_id
model.to(device)

# 2. ƒê·ªãnh nghƒ©a LoRA Config
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=8,
    lora_alpha=32,
    target_modules=["query_key_value", "dense"],
    lora_dropout=0.1,
    bias="none"
)

# 3. Ch√®n LoRA
model = get_peft_model(model, lora_config)

# --- KH·∫ÆC PH·ª§C L·ªñI VALUE ERROR ---
# √âp to√†n b·ªô c√°c tham s·ªë c·∫ßn hu·∫•n luy·ªán (LoRA adapters) v·ªÅ float32
# ƒêi·ªÅu n√†y gi√∫p Scaler t√≠nh to√°n gradient ch√≠nh x√°c, tr√°nh l·ªói unscale FP16
print("--- Casting trainable parameters to Float32 for stability ---")
for name, param in model.named_parameters():
    if param.requires_grad:
        param.data = param.data.to(torch.float32)

model.print_trainable_parameters()

Some weights of FalconForSequenceClassification were not initialized from the model checkpoint at tiiuae/falcon-rw-1b and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


--- Casting trainable parameters to Float32 for stability ---
trainable params: 2,363,392 || all params: 1,313,992,704 || trainable%: 0.1799


In [11]:
# @title 4. Smart Training (Falcon LoRA) - Fix NameError
import os
import time
import numpy as np
import torch
from transformers import TrainingArguments, Trainer, EarlyStoppingCallback
from transformers.trainer_utils import get_last_checkpoint
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
from scipy.special import softmax

# 1. ƒê·ªãnh nghƒ©a Metrics (ƒê·ªãnh nghƒ©a l·∫°i t·∫°i ƒë√¢y ƒë·ªÉ tr√°nh l·ªói NameError)
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    # Falcon c√≥ th·ªÉ tr·∫£ v·ªÅ tuple, c·∫ßn l·∫•y ph·∫ßn t·ª≠ ƒë·∫ßu ti√™n
    if isinstance(logits, tuple):
        logits = logits[0]

    predictions = np.argmax(logits, axis=-1)
    probs = softmax(logits, axis=1)[:, 1]

    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary')
    acc = accuracy_score(labels, predictions)
    roc_auc = roc_auc_score(labels, probs)

    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'roc_auc': roc_auc
    }

# 2. Training Arguments (T·ªëi ∆∞u VRAM cho Falcon)
training_args = TrainingArguments(
    output_dir=SAVE_PATH,
    num_train_epochs=3,

    # --- C·∫•u h√¨nh Ti·∫øt ki·ªám VRAM ---
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    gradient_checkpointing=True,
    # -------------------------------

    # --- C·∫•u h√¨nh Checkpoint theo Epoch ---
    save_strategy="epoch",

    save_total_limit=1,

    eval_strategy="epoch",


    learning_rate=5e-5,
    warmup_steps=500,
    weight_decay=0.01,
    logging_steps=50,

    load_best_model_at_end=True,
    metric_for_best_model="accuracy",

    fp16=True,
    report_to="none"
)

# 3. Kh·ªüi t·∫°o Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset_train,
    eval_dataset=dataset_val,
    processing_class=tokenizer,
    compute_metrics=compute_metrics,  # ƒê√£ ƒë∆∞·ª£c ƒë·ªãnh nghƒ©a ·ªü tr√™n
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

# 4. T·ª± ƒë·ªông Resume (Ch·∫°y ti·∫øp n·∫øu c√≥ checkpoint c≈©)
# T·∫Øt cache ƒë·ªÉ tr√°nh xung ƒë·ªôt v·ªõi gradient checkpointing
model.config.use_cache = False

print(f"--- Ki·ªÉm tra checkpoint t·∫°i: {SAVE_PATH} ---")
last_checkpoint = get_last_checkpoint(SAVE_PATH)

start_train_time = time.time()

if last_checkpoint:
    print(f"üîÑ T√¨m th·∫•y checkpoint: {last_checkpoint}")
    print("üöÄ ƒêang kh√¥i ph·ª•c v√† ch·∫°y ti·∫øp...")
    trainer.train(resume_from_checkpoint=last_checkpoint)
else:
    print("‚ú® Kh√¥ng th·∫•y checkpoint c≈©. B·∫Øt ƒë·∫ßu train m·ªõi...")
    trainer.train()

training_time = time.time() - start_train_time

# 5. L∆∞u model cu·ªëi c√πng
trainer.save_model(SAVE_PATH)
print("‚úÖ Hu·∫•n luy·ªán ho√†n t·∫•t v√† ƒë√£ l∆∞u!")

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 50256, 'bos_token_id': 50256}.


--- Ki·ªÉm tra checkpoint t·∫°i: /content/drive/My Drive/SLM_Research/SST2_Falcon1B_LoRA ---
üîÑ T√¨m th·∫•y checkpoint: /content/drive/My Drive/SLM_Research/SST2_Falcon1B_LoRA/checkpoint-4000
üöÄ ƒêang kh√¥i ph·ª•c v√† ch·∫°y ti·∫øp...


	logging_steps: 50 (from args) != 100 (from trainer_state.json)
	save_steps: 500 (from args) != 1000 (from trainer_state.json)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Roc Auc
1,0.1615,0.170092,0.951835,0.952381,0.958904,0.945946,0.983629
2,0.1492,0.172371,0.947248,0.948315,0.946188,0.95045,0.984153
3,0.1635,0.170795,0.950688,0.951192,0.95881,0.943694,0.984111




‚úÖ Hu·∫•n luy·ªán ho√†n t·∫•t v√† ƒë√£ l∆∞u!


In [12]:
# @title 5. Final Evaluation (Full Metrics & Resources)
import os
import time
import psutil
import torch
import pandas as pd
import numpy as np

print("--- Running Final Evaluation on Validation Set ---")

# 1. Prediction & Latency
start_pred_time = time.time()
predictions_output = trainer.predict(dataset_val)
end_pred_time = time.time()

# L·∫•y metrics
metrics = predictions_output.metrics
total_samples = len(dataset_val)
latency = ((end_pred_time - start_pred_time) / total_samples) * 1000 # ms/sample

# 2. Model Size Check (Adapter Only)
adapter_file = os.path.join(SAVE_PATH, 'adapter_model.safetensors')
if not os.path.exists(adapter_file):
    adapter_file = os.path.join(SAVE_PATH, 'adapter_model.bin')

adapter_size = 0
if os.path.exists(adapter_file):
    adapter_size = os.path.getsize(adapter_file) / (1024 * 1024)

# 3. Resource Usage
process = psutil.Process(os.getpid())
ram_usage = process.memory_info().rss / (1024 ** 2)
# L·∫•y Peak VRAM
vram_usage = torch.cuda.max_memory_allocated() / (1024 ** 2) if torch.cuda.is_available() else 0

# 4. Final Detailed Report
print("\n====== REPORT: Falcon-1B + LoRA on SST-2 ======")
print(f"1. Classification Metrics:")
print(f"   - Accuracy:  {metrics.get('test_accuracy', 0):.4f}")
print(f"   - Precision: {metrics.get('test_precision', 0):.4f}")
print(f"   - Recall:    {metrics.get('test_recall', 0):.4f}")
print(f"   - F1-Score:  {metrics.get('test_f1', 0):.4f}")
print(f"   - ROC-AUC:   {metrics.get('test_roc_auc', 0):.4f}")

print(f"\n2. Efficiency Metrics:")
print(f"   - Training Time:      {training_time:.2f} s")
print(f"   - Inference Latency:  {latency:.4f} ms/sample")
print(f"   - Adapter Size (Disk): {adapter_size:.4f} MB")
print(f"   - Peak RAM Usage:     {ram_usage:.2f} MB")
print(f"   - Peak VRAM Usage:    {vram_usage:.2f} MB")

# 5. Save CSV
results_df = pd.DataFrame({
    "Metric": ["Accuracy", "Precision", "Recall", "F1", "ROC-AUC",
               "Training Time (s)", "Inference Latency (ms)", "Adapter Size (MB)",
               "Peak RAM (MB)", "Peak VRAM (MB)"],
    "Value": [
        metrics.get('test_accuracy', 0),
        metrics.get('test_precision', 0),
        metrics.get('test_recall', 0),
        metrics.get('test_f1', 0),
        metrics.get('test_roc_auc', 0),
        training_time,
        latency,
        adapter_size,
        ram_usage,
        vram_usage
    ]
})

results_file = os.path.join(SAVE_PATH, 'sst2_falcon1b_lora_results.csv')
results_df.to_csv(results_file, index=False)
print(f"\nReport saved to {results_file}")

--- Running Final Evaluation on Validation Set ---



1. Classification Metrics:
   - Accuracy:  0.9518
   - Precision: 0.9589
   - Recall:    0.9459
   - F1-Score:  0.9524
   - ROC-AUC:   0.9836

2. Efficiency Metrics:
   - Training Time:      1816.08 s
   - Inference Latency:  9.7882 ms/sample
   - Adapter Size (Disk): 9.0287 MB
   - Peak RAM Usage:     3713.37 MB
   - Peak VRAM Usage:    5064.85 MB

Report saved to /content/drive/My Drive/SLM_Research/SST2_Falcon1B_LoRA/sst2_falcon1b_lora_results.csv
