In [8]:
# @title 1. Setup Environment & Load SST-2
!pip install -q transformers datasets evaluate scikit-learn accelerate psutil

import os
import time
import psutil
import torch
import pandas as pd
import numpy as np
from google.colab import drive
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback
)
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
from scipy.special import softmax

# 1. Mount Drive
drive.mount('/content/drive')
SAVE_PATH = '/content/drive/My Drive/SLM_Research/SST2_DistilBERT_FTT'
if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 2. Load SST-2 Dataset
print("--- Loading SST-2 (Stanford Sentiment Treebank) ---")
dataset = load_dataset("glue", "sst2")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Using device: cuda
--- Loading SST-2 (Stanford Sentiment Treebank) ---


In [9]:
# @title 2. Tokenization (DistilBERT - 512)
MODEL_NAME = 'distilbert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def tokenize_function(examples):
    # Sử dụng max_length=512 để đồng bộ với các thực nghiệm trước
    return tokenizer(examples["sentence"], padding="max_length", truncation=True, max_length=512)

print("--- Tokenizing Dataset (DistilBERT) ---")
tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Chuẩn hóa format đầu vào cho Trainer
tokenized_datasets = tokenized_datasets.remove_columns(["sentence", "idx"])
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format("torch")

dataset_train = tokenized_datasets["train"]
dataset_val = tokenized_datasets["validation"]

--- Tokenizing Dataset (DistilBERT) ---


Map:   0%|          | 0/67349 [00:00<?, ? examples/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1821 [00:00<?, ? examples/s]

In [10]:
# @title 3. DistilBERT FTT Training (Fixed Strategy)

# 1. Load Model
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)
model.to(device)

# 2. Định nghĩa đầy đủ 5 chỉ số Metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    probs = softmax(logits, axis=1)[:, 1]

    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary')
    acc = accuracy_score(labels, predictions)
    roc_auc = roc_auc_score(labels, probs)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'roc_auc': roc_auc
    }

# 3. Training Arguments (Golden Config + Fix Parameter Name)
training_args = TrainingArguments(
    output_dir='./results_sst2_distilbert_ftt',
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    learning_rate=5e-5,
    warmup_steps=500,
    weight_decay=0.01,
    logging_steps=100,
    eval_strategy="epoch",      # ĐÃ SỬA: Thay cho evaluation_strategy
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    fp16=True,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset_train,
    eval_dataset=dataset_val,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

print("--- Starting Training (DistilBERT FTT on SST-2) ---")
start_train_time = time.time()
trainer.train()
training_time = time.time() - start_train_time

# Lưu kết quả
trainer.save_model(SAVE_PATH)
tokenizer.save_pretrained(SAVE_PATH)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


--- Starting Training (DistilBERT FTT on SST-2) ---


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Roc Auc
1,0.1838,0.25419,0.904817,0.905789,0.913043,0.898649,0.967903
2,0.1225,0.368435,0.892202,0.898707,0.86157,0.939189,0.963838
3,0.0934,0.374512,0.892202,0.892938,0.903226,0.882883,0.964477
4,0.0602,0.452019,0.886468,0.887628,0.894737,0.880631,0.962433


('/content/drive/My Drive/SLM_Research/SST2_DistilBERT_FTT/tokenizer_config.json',
 '/content/drive/My Drive/SLM_Research/SST2_DistilBERT_FTT/special_tokens_map.json',
 '/content/drive/My Drive/SLM_Research/SST2_DistilBERT_FTT/vocab.txt',
 '/content/drive/My Drive/SLM_Research/SST2_DistilBERT_FTT/added_tokens.json',
 '/content/drive/My Drive/SLM_Research/SST2_DistilBERT_FTT/tokenizer.json')

In [11]:
# @title 4. Final Evaluation (Full Metrics)
print("--- Running Final Evaluation on Validation Set ---")

# 1. Prediction & Latency
start_pred_time = time.time()
predictions_output = trainer.predict(dataset_val)
end_pred_time = time.time()

metrics = predictions_output.metrics
total_samples = len(dataset_val)
latency = ((end_pred_time - start_pred_time) / total_samples) * 1000 # ms/mẫu

# 2. Model Size & Resource Usage
model_file = os.path.join(SAVE_PATH, 'model.safetensors')
if not os.path.exists(model_file):
    model_file = os.path.join(SAVE_PATH, 'pytorch_model.bin')
model_size = os.path.getsize(model_file) / (1024 * 1024)

process = psutil.Process(os.getpid())
ram_usage = process.memory_info().rss / 1024 ** 2
vram_usage = torch.cuda.memory_allocated() / 1024 ** 2 if torch.cuda.is_available() else 0

# 3. Report
print("\n====== REPORT: DistilBERT FTT on SST-2 (Length 512) ======")
print(f"1. Classification Metrics:")
print(f"   - Accuracy:  {metrics.get('test_accuracy', 0):.4f}")
print(f"   - Precision: {metrics.get('test_precision', 0):.4f}")
print(f"   - Recall:    {metrics.get('test_recall', 0):.4f}")
print(f"   - F1-Score:  {metrics.get('test_f1', 0):.4f}")
print(f"   - ROC-AUC:   {metrics.get('test_roc_auc', 0):.4f}")

print(f"\n2. Efficiency Metrics:")
print(f"   - Training Time:      {training_time:.2f} s")
print(f"   - Inference Latency:  {latency:.4f} ms/sample")
print(f"   - Model Size:         {model_size:.2f} MB")
print(f"   - Peak RAM Usage:     {ram_usage:.2f} MB")
print(f"   - Peak VRAM Usage:    {vram_usage:.2f} MB")

# 4. Save CSV
results_df = pd.DataFrame({
    "Metric": ["Accuracy", "Precision", "Recall", "F1", "ROC-AUC", "Training Time (s)", "Inference Latency (ms)", "Model Size (MB)"],
    "Value": [
        metrics.get('test_accuracy', 0),
        metrics.get('test_precision', 0),
        metrics.get('test_recall', 0),
        metrics.get('test_f1', 0),
        metrics.get('test_roc_auc', 0),
        training_time,
        latency,
        model_size
    ]
})
results_file = os.path.join(SAVE_PATH, 'sst2_distilbert_ftt_results.csv')
results_df.to_csv(results_file, index=False)
print(f"\nReport saved to {results_file}")

--- Running Final Evaluation on Validation Set ---



1. Classification Metrics:
   - Accuracy:  0.9048
   - Precision: 0.9130
   - Recall:    0.8986
   - F1-Score:  0.9058
   - ROC-AUC:   0.9679

2. Efficiency Metrics:
   - Training Time:      3195.28 s
   - Inference Latency:  3.4718 ms/sample
   - Model Size:         255.43 MB
   - Peak RAM Usage:     2489.89 MB
   - Peak VRAM Usage:    792.66 MB

Report saved to /content/drive/My Drive/SLM_Research/SST2_DistilBERT_FTT/sst2_distilbert_ftt_results.csv
