In [1]:
# @title 1. Setup Environment & Load SST-2
!pip install -q transformers datasets evaluate scikit-learn accelerate psutil

import os
import time
import psutil
import torch
import pandas as pd
import numpy as np
from google.colab import drive
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback
)
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
from scipy.special import softmax

# 1. Mount Drive
drive.mount('/content/drive')
SAVE_PATH = '/content/drive/My Drive/SLM_Research/SST2_ALBERT_FTT'
if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 2. Load SST-2 Dataset
print("--- Loading SST-2 ---")
dataset = load_dataset("glue", "sst2")

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hMounted at /content/drive
Using device: cuda
--- Loading SST-2 ---


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

sst2/train-00000-of-00001.parquet:   0%|          | 0.00/3.11M [00:00<?, ?B/s]

sst2/validation-00000-of-00001.parquet:   0%|          | 0.00/72.8k [00:00<?, ?B/s]

sst2/test-00000-of-00001.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

In [2]:
# @title 2. Tokenization (ALBERT - 512)
MODEL_NAME = 'albert-base-v2'
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def tokenize_function(examples):
    return tokenizer(examples["sentence"], padding="max_length", truncation=True, max_length=512)

print("--- Tokenizing Dataset ---")
tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Chuẩn hóa format
tokenized_datasets = tokenized_datasets.remove_columns(["sentence", "idx"])
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format("torch")

dataset_train = tokenized_datasets["train"]
dataset_val = tokenized_datasets["validation"]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/760k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.31M [00:00<?, ?B/s]

--- Tokenizing Dataset ---


Map:   0%|          | 0/67349 [00:00<?, ? examples/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1821 [00:00<?, ? examples/s]

In [3]:
# @title 3. ALBERT Full Fine-Tuning Training (Fixed)

# 1. Load Model
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)
model.to(device)

# 2. Metrics Definition
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    probs = softmax(logits, axis=1)[:, 1]
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary')
    acc = accuracy_score(labels, predictions)
    roc_auc = roc_auc_score(labels, probs)
    return {'accuracy': acc, 'f1': f1, 'precision': precision, 'recall': recall, 'roc_auc': roc_auc}

# 3. Training Arguments (Golden Config + Fix Parameter Name)
training_args = TrainingArguments(
    output_dir='./results_sst2_albert_ftt',
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    learning_rate=5e-5,
    warmup_steps=500,
    weight_decay=0.01,
    logging_steps=100,
    eval_strategy="epoch",      # ĐÃ SỬA: evaluation_strategy -> eval_strategy
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    fp16=True,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset_train,
    eval_dataset=dataset_val,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

print("--- Starting Training ALBERT ---")
start_train_time = time.time()
trainer.train()
training_time = time.time() - start_train_time

# Save
trainer.save_model(SAVE_PATH)
tokenizer.save_pretrained(SAVE_PATH)

model.safetensors:   0%|          | 0.00/47.4M [00:00<?, ?B/s]

Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


--- Starting Training ALBERT ---


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Roc Auc
1,0.2678,0.388793,0.831422,0.816936,0.913649,0.738739,0.919979
2,0.1977,0.391083,0.866972,0.875,0.838843,0.914414,0.943944
3,0.16,0.417998,0.863532,0.871351,0.837838,0.907658,0.938113
4,0.1527,0.512353,0.875,0.873696,0.899761,0.849099,0.940565
5,0.0825,0.639374,0.880734,0.882883,0.882883,0.882883,0.935779


('/content/drive/My Drive/SLM_Research/SST2_ALBERT_FTT/tokenizer_config.json',
 '/content/drive/My Drive/SLM_Research/SST2_ALBERT_FTT/special_tokens_map.json',
 '/content/drive/My Drive/SLM_Research/SST2_ALBERT_FTT/spiece.model',
 '/content/drive/My Drive/SLM_Research/SST2_ALBERT_FTT/added_tokens.json',
 '/content/drive/My Drive/SLM_Research/SST2_ALBERT_FTT/tokenizer.json')

In [4]:
# @title 4. Final Evaluation (ALBERT - SST-2)
print("--- Running Final Evaluation ---")

# 1. Prediction
start_pred_time = time.time()
predictions_output = trainer.predict(dataset_val)
end_pred_time = time.time()

metrics = predictions_output.metrics
latency = ((end_pred_time - start_pred_time) / len(dataset_val)) * 1000

# 2. Resource & Size
model_file = os.path.join(SAVE_PATH, 'model.safetensors')
if not os.path.exists(model_file): model_file = os.path.join(SAVE_PATH, 'pytorch_model.bin')
model_size = os.path.getsize(model_file) / (1024 * 1024)

process = psutil.Process(os.getpid())
ram_usage = process.memory_info().rss / 1024 ** 2
vram_usage = torch.cuda.memory_allocated() / 1024 ** 2 if torch.cuda.is_available() else 0

# 3. Report
print("\n====== REPORT: ALBERT FTT on SST-2 ======")
print(f"Accuracy:  {metrics.get('test_accuracy', 0):.4f}")
print(f"Precision: {metrics.get('test_precision', 0):.4f}")
print(f"Recall:    {metrics.get('test_recall', 0):.4f}")
print(f"F1-Score:  {metrics.get('test_f1', 0):.4f}")
print(f"ROC-AUC:   {metrics.get('test_roc_auc', 0):.4f}")
print(f"\nInference Latency: {latency:.4f} ms/sample")
print(f"Model Size:        {model_size:.2f} MB")

# Save CSV
results_df = pd.DataFrame({
    "Metric": ["Accuracy", "Precision", "Recall", "F1", "ROC-AUC", "Training Time (s)", "Inference Latency (ms)", "Model Size (MB)"],
    "Value": [metrics['test_accuracy'], metrics['test_precision'], metrics['test_recall'], metrics['test_f1'], metrics['test_roc_auc'], training_time, latency, model_size]
})
results_df.to_csv(os.path.join(SAVE_PATH, 'sst2_albert_ftt_results.csv'), index=False)

--- Running Final Evaluation ---



Accuracy:  0.8807
Precision: 0.8829
Recall:    0.8829
F1-Score:  0.8829
ROC-AUC:   0.9358

Inference Latency: 11.7899 ms/sample
Model Size:        44.58 MB
