In [1]:
# @title 1. Setup Environment & Load SST-2
!pip install -q transformers peft datasets evaluate scikit-learn accelerate psutil

import os
import time
import psutil
import torch
import pandas as pd
import numpy as np
from google.colab import drive
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback
)
from peft import get_peft_model, LoraConfig, TaskType
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
from scipy.special import softmax

# 1. Mount Drive
drive.mount('/content/drive')
SAVE_PATH = '/content/drive/My Drive/SLM_Research/SST2_TinyBERT_LoRA'
if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 2. Load SST-2 Dataset
print("--- Loading SST-2 ---")
dataset = load_dataset("glue", "sst2")

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hMounted at /content/drive
Using device: cuda
--- Loading SST-2 ---


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

sst2/train-00000-of-00001.parquet:   0%|          | 0.00/3.11M [00:00<?, ?B/s]

sst2/validation-00000-of-00001.parquet:   0%|          | 0.00/72.8k [00:00<?, ?B/s]

sst2/test-00000-of-00001.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

In [2]:
# @title 2. Tokenization (TinyBERT - 512)
MODEL_NAME = 'huawei-noah/TinyBERT_General_4L_312D'
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def tokenize_function(examples):
    return tokenizer(examples["sentence"], padding="max_length", truncation=True, max_length=512)

print("--- Tokenizing Dataset (TinyBERT) ---")
tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Chuẩn hóa format
tokenized_datasets = tokenized_datasets.remove_columns(["sentence", "idx"])
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format("torch")

dataset_train = tokenized_datasets["train"]
dataset_val = tokenized_datasets["validation"]

config.json:   0%|          | 0.00/409 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

--- Tokenizing Dataset (TinyBERT) ---


Map:   0%|          | 0/67349 [00:00<?, ? examples/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1821 [00:00<?, ? examples/s]

In [3]:
# @title 3. TinyBERT + LoRA Configuration

# 1. Load Base Model
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)

# 2. Định nghĩa LoRA Config
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=8,
    lora_alpha=32,
    target_modules=["query", "value"],
    lora_dropout=0.1,
    bias="none"
)

# 3. Chèn LoRA vào mô hình
model = get_peft_model(model, lora_config)
model.to(device)

print("\n--- LoRA Efficiency (TinyBERT) ---")
model.print_trainable_parameters()

pytorch_model.bin:   0%|          | 0.00/62.7M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at huawei-noah/TinyBERT_General_4L_312D and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- LoRA Efficiency (TinyBERT) ---
trainable params: 40,562 || all params: 14,391,436 || trainable%: 0.2818


In [4]:
# @title 4. Training (LoRA on SST-2)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    probs = softmax(logits, axis=1)[:, 1]

    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary')
    acc = accuracy_score(labels, predictions)
    roc_auc = roc_auc_score(labels, probs)
    return {'accuracy': acc, 'f1': f1, 'precision': precision, 'recall': recall, 'roc_auc': roc_auc}

training_args = TrainingArguments(
    output_dir='./results_sst2_tinybert_lora',
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    learning_rate=5e-5,
    warmup_steps=500,
    weight_decay=0.01,
    logging_steps=100,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    fp16=True,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset_train,
    eval_dataset=dataset_val,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

print("--- Starting Training (TinyBERT LoRA) ---")
start_train_time = time.time()
trainer.train()
training_time = time.time() - start_train_time

# Save Adapter
trainer.save_model(SAVE_PATH)
tokenizer.save_pretrained(SAVE_PATH)

--- Starting Training (TinyBERT LoRA) ---


model.safetensors:   0%|          | 0.00/62.7M [00:00<?, ?B/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Roc Auc
1,0.3932,0.383846,0.837156,0.842222,0.83114,0.853604,0.91689
2,0.3309,0.353268,0.852064,0.858708,0.835821,0.882883,0.931327
3,0.3393,0.33222,0.856651,0.860957,0.850549,0.871622,0.936177


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Roc Auc
1,0.3932,0.383846,0.837156,0.842222,0.83114,0.853604,0.91689
2,0.3309,0.353268,0.852064,0.858708,0.835821,0.882883,0.931327
3,0.3393,0.33222,0.856651,0.860957,0.850549,0.871622,0.936177
4,0.331,0.328517,0.860092,0.865934,0.845494,0.887387,0.9392
5,0.3429,0.325859,0.863532,0.868508,0.852495,0.885135,0.9391


('/content/drive/My Drive/SLM_Research/SST2_TinyBERT_LoRA/tokenizer_config.json',
 '/content/drive/My Drive/SLM_Research/SST2_TinyBERT_LoRA/special_tokens_map.json',
 '/content/drive/My Drive/SLM_Research/SST2_TinyBERT_LoRA/vocab.txt',
 '/content/drive/My Drive/SLM_Research/SST2_TinyBERT_LoRA/added_tokens.json',
 '/content/drive/My Drive/SLM_Research/SST2_TinyBERT_LoRA/tokenizer.json')

In [5]:
# @title 5. Final Evaluation (Full Metrics)
print("--- Running Final Evaluation on Validation Set ---")

# 1. Prediction & Latency
start_pred_time = time.time()
predictions_output = trainer.predict(dataset_val)
end_pred_time = time.time()

metrics = predictions_output.metrics
total_samples = len(dataset_val)
latency = ((end_pred_time - start_pred_time) / total_samples) * 1000 # ms/mẫu

# 2. Model Size Check (Adapter Only)
adapter_file = os.path.join(SAVE_PATH, 'adapter_model.safetensors')
if not os.path.exists(adapter_file):
    adapter_file = os.path.join(SAVE_PATH, 'adapter_model.bin')
adapter_size = os.path.getsize(adapter_file) / (1024 * 1024)

# 3. Resource Usage
process = psutil.Process(os.getpid())
ram_usage = process.memory_info().rss / 1024 ** 2
vram_usage = torch.cuda.memory_allocated() / 1024 ** 2 if torch.cuda.is_available() else 0

# 4. Final Report
print("\n====== REPORT: TinyBERT + LoRA on SST-2 ======")
print(f"1. Classification Metrics:")
print(f"   - Accuracy:  {metrics.get('test_accuracy', 0):.4f}")
print(f"   - Precision: {metrics.get('test_precision', 0):.4f}")
print(f"   - Recall:    {metrics.get('test_recall', 0):.4f}")
print(f"   - F1-Score:  {metrics.get('test_f1', 0):.4f}")
print(f"   - ROC-AUC:   {metrics.get('test_roc_auc', 0):.4f}")

print(f"\n2. Efficiency Metrics:")
print(f"   - Training Time:      {training_time:.2f} s")
print(f"   - Inference Latency:  {latency:.4f} ms/sample")
print(f"   - Adapter Size (MB):  {adapter_size:.4f} MB")
print(f"   - Peak RAM Usage:     {ram_usage:.2f} MB")
print(f"   - Peak VRAM Usage:    {vram_usage:.2f} MB")

# 5. Save CSV
results_df = pd.DataFrame({
    "Metric": ["Accuracy", "Precision", "Recall", "F1", "ROC-AUC", "Training Time (s)", "Inference Latency (ms)", "Adapter Size (MB)"],
    "Value": [
        metrics.get('test_accuracy', 0),
        metrics.get('test_precision', 0),
        metrics.get('test_recall', 0),
        metrics.get('test_f1', 0),
        metrics.get('test_roc_auc', 0),
        training_time,
        latency,
        adapter_size
    ]
})
results_file = os.path.join(SAVE_PATH, 'sst2_tinybert_lora_results.csv')
results_df.to_csv(results_file, index=False)
print(f"\nReport saved to {results_file}")

--- Running Final Evaluation on Validation Set ---



1. Classification Metrics:
   - Accuracy:  0.8635
   - Precision: 0.8525
   - Recall:    0.8851
   - F1-Score:  0.8685
   - ROC-AUC:   0.9391

2. Efficiency Metrics:
   - Training Time:      2874.72 s
   - Inference Latency:  3.6485 ms/sample
   - Adapter Size (MB):  0.1570 MB
   - Peak RAM Usage:     2513.34 MB
   - Peak VRAM Usage:    72.75 MB

Report saved to /content/drive/My Drive/SLM_Research/SST2_TinyBERT_LoRA/sst2_tinybert_lora_results.csv
