In [1]:
# @title 1. Setup Environment
!pip install -q transformers peft datasets evaluate scikit-learn accelerate psutil

import os
import re
import time
import psutil
import numpy as np
import pandas as pd
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback
)
from peft import get_peft_model, LoraConfig, TaskType
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
from scipy.special import softmax
from google.colab import drive

# Mount Drive
drive.mount('/content/drive')
SAVE_PATH = '/content/drive/My Drive/SLM_Research/IMDB_TinyBERT_LoRA'
if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hMounted at /content/drive
Using device: cuda


In [2]:
# @title 2. Data Loading & Tokenization
print("--- Loading Stanford IMDB Dataset ---")
dataset = load_dataset("imdb")

def clean_text(example):
    text = example['text']
    text = text.lower()
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    text = re.sub(r'<br\s*/>', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    example['text'] = text
    return example

dataset = dataset.map(clean_text)

# TinyBERT Model Name
MODEL_NAME = 'huawei-noah/TinyBERT_General_4L_312D'
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(["text"])
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format("torch")

# Splits
train_val_split = tokenized_datasets["train"].train_test_split(test_size=0.1, seed=42)
dataset_train = train_val_split["train"]
dataset_val = train_val_split["test"]
dataset_test = tokenized_datasets["test"]

print("Data ready for TinyBERT.")

--- Loading Stanford IMDB Dataset ---


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

plain_text/train-00000-of-00001.parquet:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

plain_text/test-00000-of-00001.parquet:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

plain_text/unsupervised-00000-of-00001.p(…):   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/409 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

Data ready for TinyBERT.


In [3]:
# @title 3. TinyBERT + LoRA Configuration

# 1. Load Base Model
print(f"--- Loading Base Model: {MODEL_NAME} ---")
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)

# 2. Define LoRA Config
# TinyBERT có kiến trúc BERT, các lớp attention là query, key, value.
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=8,
    lora_alpha=32,
    target_modules=["query", "value"], # Target các lớp attention chính
    lora_dropout=0.1,
    bias="none"
)

# 3. Inject LoRA
model = get_peft_model(model, lora_config)
model.to(device)

print("\n--- LoRA Efficiency (TinyBERT) ---")
model.print_trainable_parameters()

--- Loading Base Model: huawei-noah/TinyBERT_General_4L_312D ---


pytorch_model.bin:   0%|          | 0.00/62.7M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at huawei-noah/TinyBERT_General_4L_312D and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- LoRA Efficiency (TinyBERT) ---
trainable params: 40,562 || all params: 14,391,436 || trainable%: 0.2818


In [4]:
# @title 4. Training (LoRA)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    probs = softmax(logits, axis=1)[:, 1]

    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary')
    acc = accuracy_score(labels, predictions)
    roc_auc = roc_auc_score(labels, probs)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'roc_auc': roc_auc
    }

training_args = TrainingArguments(
    output_dir='./results_tinybert_lora',
    num_train_epochs=5,
    per_device_train_batch_size=32, # TinyBERT nhỏ nên có thể tăng batch size
    per_device_eval_batch_size=64,
    learning_rate=5e-5,              # Golden Config
    warmup_steps=500,
    weight_decay=0.01,
    logging_steps=100,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    save_total_limit=2,
    fp16=True,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset_train,
    eval_dataset=dataset_val,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

print("--- Starting Training (TinyBERT + LoRA) ---")
start_train_time = time.time()
trainer.train()
training_time = time.time() - start_train_time

# Fix lỗi device sau khi load best model
model.to(device)

# Save
trainer.save_model(SAVE_PATH)
tokenizer.save_pretrained(SAVE_PATH)

--- Starting Training (TinyBERT + LoRA) ---


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Roc Auc
1,0.578,0.548866,0.7992,0.776292,0.882472,0.69292,0.875335
2,0.4805,0.451374,0.8268,0.815823,0.8766,0.762928,0.913457
3,0.4614,0.410476,0.8412,0.836558,0.866894,0.808274,0.921225
4,0.4246,0.394357,0.844,0.840426,0.865206,0.817025,0.923564
5,0.409,0.391963,0.8432,0.837748,0.873167,0.805091,0.924065


('/content/drive/My Drive/SLM_Research/IMDB_TinyBERT_LoRA/tokenizer_config.json',
 '/content/drive/My Drive/SLM_Research/IMDB_TinyBERT_LoRA/special_tokens_map.json',
 '/content/drive/My Drive/SLM_Research/IMDB_TinyBERT_LoRA/vocab.txt',
 '/content/drive/My Drive/SLM_Research/IMDB_TinyBERT_LoRA/added_tokens.json',
 '/content/drive/My Drive/SLM_Research/IMDB_TinyBERT_LoRA/tokenizer.json')

In [5]:
# @title 5. Final Evaluation (Full Metrics)
print("--- Running Evaluation on Test Set ---")

# 1. Classification Metrics & Latency
start_pred_time = time.time()
predictions_output = trainer.predict(dataset_test)
end_pred_time = time.time()

metrics = predictions_output.metrics
total_samples = len(dataset_test)
latency = ((end_pred_time - start_pred_time) / total_samples) * 1000

# 2. Model Size Check (Adapter Only)
adapter_safe = os.path.join(SAVE_PATH, 'adapter_model.safetensors')
adapter_bin = os.path.join(SAVE_PATH, 'adapter_model.bin')

if os.path.exists(adapter_safe):
    model_size = os.path.getsize(adapter_safe) / (1024 * 1024)
elif os.path.exists(adapter_bin):
    model_size = os.path.getsize(adapter_bin) / (1024 * 1024)
else:
    model_size = 0

# 3. Resource Usage
process = psutil.Process(os.getpid())
ram_usage = process.memory_info().rss / 1024 ** 2
vram_usage = torch.cuda.memory_allocated() / 1024 ** 2 if torch.cuda.is_available() else 0

# 4. Final Report
print("\n====== REPORT: TinyBERT + LoRA on IMDB ======")
print(f"1. Classification Metrics:")
print(f"   - Accuracy:  {metrics.get('test_accuracy', 0):.4f}")
print(f"   - Precision: {metrics.get('test_precision', 0):.4f}")
print(f"   - Recall:    {metrics.get('test_recall', 0):.4f}")
print(f"   - F1-Score:  {metrics.get('test_f1', 0):.4f}")
print(f"   - ROC-AUC:   {metrics.get('test_roc_auc', 0):.4f}")

print(f"\n2. Efficiency Metrics:")
print(f"   - Training Time:      {training_time:.2f} s")
print(f"   - Inference Latency:  {latency:.4f} ms/sample")
print(f"   - Adapter Size:       {model_size:.4f} MB")
print(f"   - Peak RAM Usage:     {ram_usage:.2f} MB")
print(f"   - Peak VRAM Usage:    {vram_usage:.2f} MB")

# 5. Save CSV
results_df = pd.DataFrame({
    "Metric": ["Accuracy", "Precision", "Recall", "F1", "ROC-AUC", "Training Time (s)", "Inference Latency (ms)", "Adapter Size (MB)"],
    "Value": [
        metrics.get('test_accuracy', 0),
        metrics.get('test_precision', 0),
        metrics.get('test_recall', 0),
        metrics.get('test_f1', 0),
        metrics.get('test_roc_auc', 0),
        training_time,
        latency,
        model_size
    ]
})
results_file = os.path.join(SAVE_PATH, 'imdb_tinybert_lora_results.csv')
results_df.to_csv(results_file, index=False)
print(f"\nReport saved to {results_file}")

--- Running Evaluation on Test Set ---



1. Classification Metrics:
   - Accuracy:  0.8457
   - Precision: 0.8569
   - Recall:    0.8300
   - F1-Score:  0.8433
   - ROC-AUC:   0.9238

2. Efficiency Metrics:
   - Training Time:      971.56 s
   - Inference Latency:  3.5872 ms/sample
   - Adapter Size:       0.1570 MB
   - Peak RAM Usage:     2686.55 MB
   - Peak VRAM Usage:    72.75 MB

Report saved to /content/drive/My Drive/SLM_Research/IMDB_TinyBERT_LoRA/imdb_tinybert_lora_results.csv
