In [1]:
# @title 1. Clean Install & Fix Dependencies (Final Fix)
# 1. Gỡ bỏ sạch sẽ tất cả các thư viện có thể gây xung đột
!pip uninstall -y transformers adapters huggingface_hub accelerate peft sentence-transformers

# 2. Cài đặt bộ 4 phiên bản tương thích chặt chẽ (Snapshot đầu năm 2024)
# accelerate==0.27.2: Tương thích hoàn hảo với transformers 4.39.3
# huggingface_hub==0.23.0: Sửa lỗi Import url_to_filename
# transformers==4.39.3: Tương thích với adapters 0.2.1
# adapters==0.2.1: Thư viện chuẩn cho Houlsby config
!pip install -q huggingface_hub==0.23.0 transformers==4.39.3 adapters==0.2.1 accelerate==0.27.2 datasets evaluate scikit-learn

import os
import re
import time
import torch
import numpy as np
import pandas as pd
import transformers
import adapters
import accelerate
from google.colab import drive
from datasets import load_dataset

# Import modules
from adapters import AutoAdapterModel, AdapterTrainer
from transformers import AutoTokenizer, TrainingArguments, EarlyStoppingCallback
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
from scipy.special import softmax

# 3. Mount Drive
drive.mount('/content/drive')
SAVE_PATH = '/content/drive/My Drive/SLM_Research/IMDB_DistilBERT_Adapters_Houlsby'
if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)

# Check Versions (Bắt buộc phải khớp bộ này)
print(f"--- Verification ---")
print(f"Transformers version: {transformers.__version__}") # Expect: 4.39.3
print(f"Adapters version: {adapters.__version__}")         # Expect: 0.2.1
print(f"Accelerate version: {accelerate.__version__}")     # Expect: 0.27.2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 4. Load Data & Clean
print("--- Loading Stanford IMDB Dataset ---")
dataset = load_dataset("imdb")

def clean_text(example):
    text = example['text']
    text = text.lower()
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    text = re.sub(r'<br\s*/>', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    example['text'] = text
    return example

dataset = dataset.map(clean_text)

Found existing installation: transformers 4.57.3
Uninstalling transformers-4.57.3:
  Successfully uninstalled transformers-4.57.3
[0mFound existing installation: huggingface-hub 0.36.0
Uninstalling huggingface-hub-0.36.0:
  Successfully uninstalled huggingface-hub-0.36.0
Found existing installation: accelerate 1.12.0
Uninstalling accelerate-1.12.0:
  Successfully uninstalled accelerate-1.12.0
Found existing installation: peft 0.18.0
Uninstalling peft-0.18.0:
  Successfully uninstalled peft-0.18.0
Found existing installation: sentence-transformers 5.1.2
Uninstalling sentence-transformers-5.1.2:
  Successfully uninstalled sentence-transformers-5.1.2
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m401.2/401.2 kB[0m [31m35.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.8/8.8 MB[0m [31m137.6 MB/s[0m eta 

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

unsupervised-00000-of-00001.parquet:   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

In [5]:
# @title 2. DistilBERT + Houlsby Adapters (Fixed Device Error)

import os
import time
import numpy as np
import pandas as pd
import torch
from transformers import AutoTokenizer, TrainingArguments, EarlyStoppingCallback
from adapters import AutoAdapterModel, AdapterTrainer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
from scipy.special import softmax

# 1. Setup Model & Tokenizer
MODEL_NAME = 'distilbert-base-uncased'
print(f"--- Loading Tokenizer: {MODEL_NAME} ---")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)

# Tokenize lại nếu cần
if 'dataset' in locals():
    tokenized_datasets = dataset.map(tokenize_function, batched=True)
    tokenized_datasets = tokenized_datasets.remove_columns(["text"])
    tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
    tokenized_datasets.set_format("torch")

    train_val_split = tokenized_datasets["train"].train_test_split(test_size=0.1, seed=42)
    dataset_train = train_val_split["train"]
    dataset_val = train_val_split["test"]
    dataset_test = tokenized_datasets["test"]
else:
    print("Lỗi: Hãy chạy bước Load Data (Clean Install) ở trên trước!")

# 2. Model Setup (Houlsby Config)
print(f"--- Loading Base Model & Injecting Adapters ---")
model = AutoAdapterModel.from_pretrained(MODEL_NAME)
adapter_name = "imdb_houlsby"

# Config "seq_bn" = Houlsby et al., 2019
model.add_adapter(adapter_name, config="seq_bn")
model.add_classification_head(adapter_name, num_labels=2)
model.train_adapter(adapter_name)
model.to(device)

# Hàm tính tham số
def print_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param:.4f}%"
    )

print("\n--- Houlsby Architecture Efficiency ---")
print_trainable_parameters(model)

# 3. Training Arguments
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    probs = softmax(logits, axis=1)[:, 1]
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary')
    acc = accuracy_score(labels, predictions)
    roc_auc = roc_auc_score(labels, probs)
    return {'accuracy': acc, 'f1': f1, 'precision': precision, 'recall': recall, 'roc_auc': roc_auc}

training_args = TrainingArguments(
    output_dir='./results_distilbert_houlsby',
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    learning_rate=5e-5,
    warmup_steps=500,
    weight_decay=0.01,
    logging_steps=100,
    evaluation_strategy="epoch",  # Phiên bản transformers 4.39.3 dùng từ khóa này
    save_strategy="epoch",
    load_best_model_at_end=True,  # Nguyên nhân gây lỗi device (nhưng ta sẽ fix bên dưới)
    metric_for_best_model="accuracy",
    save_total_limit=2,
    fp16=True,
    report_to="none"
)

trainer = AdapterTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset_train,
    eval_dataset=dataset_val,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

print("--- Starting Training (DistilBERT + Houlsby) ---")
start_train_time = time.time()
trainer.train()
end_train_time = time.time()
training_time = end_train_time - start_train_time
print(f"Training Time: {training_time:.2f}s")

# --- QUAN TRỌNG: FIX LỖI DEVICE ---
# Sau khi load_best_model_at_end, một số layer có thể bị rớt về CPU.
# Ta ép buộc toàn bộ model lên GPU lại lần nữa.
print("Ensuring model is on GPU...")
model.to(device)
# ----------------------------------

# 4. Save & Evaluate
print(f"Saving Adapters to {SAVE_PATH}...")
model.save_adapter(SAVE_PATH, adapter_name)
model.save_head(SAVE_PATH, adapter_name)
tokenizer.save_pretrained(SAVE_PATH)

print("--- Evaluation ---")
start_pred_time = time.time()
metrics = trainer.predict(dataset_test).metrics
end_pred_time = time.time()
latency = ((end_pred_time - start_pred_time) / len(dataset_test)) * 1000

# Calculate Size
adapter_bin = os.path.join(SAVE_PATH, 'pytorch_adapter.bin')
head_bin = os.path.join(SAVE_PATH, 'pytorch_model_head.bin')
size_mb = 0
if os.path.exists(adapter_bin): size_mb += os.path.getsize(adapter_bin) / (1024**2)
if os.path.exists(head_bin): size_mb += os.path.getsize(head_bin) / (1024**2)

print("\n====== REPORT: DistilBERT + Houlsby Adapters ======")
print(f"Accuracy: {metrics['test_accuracy']:.4f}")
print(f"F1: {metrics['test_f1']:.4f}")
print(f"Adapter Size: {size_mb:.2f} MB")
print(f"Inference Latency: {latency:.4f} ms/sample")

# Save CSV
results_df = pd.DataFrame({
    "Metric": ["Accuracy", "F1", "ROC-AUC", "Training Time (s)", "Inference Latency (ms)", "Adapter Size (MB)"],
    "Value": [metrics['test_accuracy'], metrics['test_f1'], metrics['test_roc_auc'], training_time, latency, size_mb]
})
results_df.to_csv(os.path.join(SAVE_PATH, 'imdb_distilbert_houlsby_results.csv'), index=False)
print("Done!")

--- Loading Tokenizer: distilbert-base-uncased ---




Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

--- Loading Base Model & Injecting Adapters ---

--- Houlsby Architecture Efficiency ---
trainable params: 1662044 || all params: 68024924 || trainable%: 2.4433%
--- Starting Training (DistilBERT + Houlsby) ---


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Roc Auc
1,0.2585,0.284655,0.8884,0.881327,0.946984,0.824185,0.96405
2,0.2475,0.254253,0.904,0.900826,0.937231,0.867144,0.967841
3,0.2457,0.232318,0.9112,0.911624,0.912351,0.910899,0.969311
4,0.2004,0.237459,0.9104,0.909385,0.925103,0.894193,0.970401
5,0.1924,0.234707,0.9088,0.90836,0.917953,0.898966,0.970693




Training Time: 1645.72s
Ensuring model is on GPU...
Saving Adapters to /content/drive/My Drive/SLM_Research/IMDB_DistilBERT_Adapters_Houlsby...
--- Evaluation ---



Accuracy: 0.9182
F1: 0.9194
Adapter Size: 3.98 MB
Inference Latency: 5.8364 ms/sample
Done!


In [6]:
# @title 5. Final Evaluation on Test Set (DistilBERT Adapters - Full Metrics)
import os
import time
import psutil
import torch
import pandas as pd
import numpy as np

# Đảm bảo đường dẫn đúng (phải khớp với đường dẫn lúc train)
SAVE_PATH = '/content/drive/My Drive/SLM_Research/IMDB_DistilBERT_Adapters_Houlsby'

print("--- Running Evaluation on Test Set ---")

# Kiểm tra xem trainer đã tồn tại chưa
if 'trainer' not in locals() or 'dataset_test' not in locals():
     raise ValueError("Lỗi: Biến 'trainer' hoặc 'dataset_test' chưa được định nghĩa. Hãy chạy bước Training trước.")

# 1. Classification Metrics
# Đo thời gian suy luận
start_pred_time = time.time()
predictions_output = trainer.predict(dataset_test)
end_pred_time = time.time()

# Lấy các metrics từ trainer (đã được định nghĩa trong compute_metrics lúc train)
metrics = predictions_output.metrics

# 2. Efficiency Metrics
total_samples = len(dataset_test)
total_inference_time = end_pred_time - start_pred_time
latency_per_sample = (total_inference_time / total_samples) * 1000 # ms

# 3. Model Size Check (Adapter + Head Only)
# Thư viện adapters lưu thành 2 file chính: pytorch_adapter.bin và pytorch_model_head.bin
adapter_bin = os.path.join(SAVE_PATH, 'pytorch_adapter.bin')
head_bin = os.path.join(SAVE_PATH, 'pytorch_model_head.bin')
adapter_safe = os.path.join(SAVE_PATH, 'adapter.safetensors') # Đề phòng lưu dạng safetensors

model_size = 0
if os.path.exists(adapter_bin):
    model_size += os.path.getsize(adapter_bin)
if os.path.exists(head_bin):
    model_size += os.path.getsize(head_bin)
if os.path.exists(adapter_safe):
    model_size += os.path.getsize(adapter_safe)

# Đổi sang MB
model_size_mb = model_size / (1024 * 1024)

# Lấy thời gian train từ phiên trước (nếu còn lưu trong biến)
current_training_time = training_time if 'training_time' in locals() else 0.0

# Đo RAM/VRAM hiện tại
process = psutil.Process(os.getpid())
ram_usage = process.memory_info().rss / 1024 ** 2
vram_usage = torch.cuda.memory_allocated() / 1024 ** 2 if torch.cuda.is_available() else 0

# 4. Report
print("\n====== REPORT: DistilBERT + Houlsby Adapters ======")
print(f"1. Classification Metrics:")
print(f"   - Accuracy:  {metrics.get('test_accuracy', 0):.4f}")
print(f"   - Precision: {metrics.get('test_precision', 0):.4f}")
print(f"   - Recall:    {metrics.get('test_recall', 0):.4f}")
print(f"   - F1-Score:  {metrics.get('test_f1', 0):.4f}")
print(f"   - ROC-AUC:   {metrics.get('test_roc_auc', 0):.4f}")

print(f"\n2. Efficiency Metrics:")
print(f"   - Training Time:      {current_training_time:.2f} s")
print(f"   - Inference Latency:  {latency_per_sample:.4f} ms/sample")
print(f"   - Adapter Size (Disk): {model_size_mb:.2f} MB")
print(f"   - Peak RAM Usage:     {ram_usage:.2f} MB")
print(f"   - Peak VRAM Usage:    {vram_usage:.2f} MB")

# 5. Save CSV
results_df = pd.DataFrame({
    "Metric": ["Accuracy", "Precision", "Recall", "F1", "ROC-AUC", "Training Time (s)", "Inference Latency (ms)", "Adapter Size (MB)"],
    "Value": [
        metrics.get('test_accuracy', 0),
        metrics.get('test_precision', 0),
        metrics.get('test_recall', 0),
        metrics.get('test_f1', 0),
        metrics.get('test_roc_auc', 0),
        current_training_time,
        latency_per_sample,
        model_size_mb
    ]
})
results_file = os.path.join(SAVE_PATH, 'imdb_distilbert_adapters_results.csv')
results_df.to_csv(results_file, index=False)
print(f"\nReport saved to {results_file}")

--- Running Evaluation on Test Set ---



1. Classification Metrics:
   - Accuracy:  0.9182
   - Precision: 0.9065
   - Recall:    0.9326
   - F1-Score:  0.9194
   - ROC-AUC:   0.9747

2. Efficiency Metrics:
   - Training Time:      1645.72 s
   - Inference Latency:  5.8798 ms/sample
   - Adapter Size (Disk): 3.98 MB
   - Peak RAM Usage:     2818.36 MB
   - Peak VRAM Usage:    605.21 MB

Report saved to /content/drive/My Drive/SLM_Research/IMDB_DistilBERT_Adapters_Houlsby/imdb_distilbert_adapters_results.csv
