In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset, DatasetDict
import torch
import matplotlib.pyplot as plt
# Disable Weights & Biases (wandb) logging
import os
os.environ["WANDB_DISABLED"] = "true"

In [None]:
# Read file from Google Colab upload
df = pd.read_excel("input_file")
df = df[["Câu", "Phân loại ESG"]].dropna()
df["label"] = df["Phân loại ESG"].map({"S": 1, "U": 0})
df = df.rename(columns={"Câu": "text"})

In [None]:
train_texts, temp_texts, train_labels, temp_labels = train_test_split(
    df["text"], df["label"], test_size=0.4, stratify=df["label"], random_state=42
)
val_texts, test_texts, val_labels, test_labels = train_test_split(
    temp_texts, temp_labels, test_size=0.5, stratify=temp_labels, random_state=42
)

train_ds = Dataset.from_dict({"text": train_texts.tolist(), "label": train_labels.tolist()})
val_ds = Dataset.from_dict({"text": val_texts.tolist(), "label": val_labels.tolist()})
test_ds = Dataset.from_dict({"text": test_texts.tolist(), "label": test_labels.tolist()})
dataset = DatasetDict({"train": train_ds, "validation": val_ds, "test": test_ds})


In [None]:
model_names = {
    "phobert": "vinai/phobert-base",
    "vibert": "FPTAI/vibert-base-cased",
    "xlmr": "xlm-roberta-base"
}

models, tokenizers, tokenized_datasets = {}, {}, {}

for name, model_path in model_names.items():
    print(f"🔃 Loading: {name}")
    tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
    model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=2)
    encoded = dataset.map(lambda x: tokenizer(x["text"], truncation=True, padding="max_length", max_length=128), batched=True)
    models[name] = model
    tokenizers[name] = tokenizer
    tokenized_datasets[name] = encoded


🔃 Đang load: phobert


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/557 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/895k [00:00<?, ?B/s]

bpe.codes:   0%|          | 0.00/1.14M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.13M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/543M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/phobert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


model.safetensors:   0%|          | 0.00/543M [00:00<?, ?B/s]

Map:   0%|          | 0/7075 [00:00<?, ? examples/s]

Map:   0%|          | 0/2358 [00:00<?, ? examples/s]

Map:   0%|          | 0/2359 [00:00<?, ? examples/s]

🔃 Đang load: vibert


config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/255k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/581M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at FPTAI/vibert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


model.safetensors:   0%|          | 0.00/581M [00:00<?, ?B/s]

Map:   0%|          | 0/7075 [00:00<?, ? examples/s]

Map:   0%|          | 0/2358 [00:00<?, ? examples/s]

Map:   0%|          | 0/2359 [00:00<?, ? examples/s]

🔃 Đang load: xlmr


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/7075 [00:00<?, ? examples/s]

Map:   0%|          | 0/2358 [00:00<?, ? examples/s]

Map:   0%|          | 0/2359 [00:00<?, ? examples/s]

In [None]:
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_score, recall_score

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    if isinstance(logits, tuple):  # Some models return tuples
        logits = logits[0]
    preds = torch.argmax(torch.tensor(logits), dim=1).numpy()
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "recall": recall_score(labels, preds),
        "precision": precision_score(labels, preds)
    }

In [None]:
trainers = {}
histories = {}

for name in models:
    print(f"🏁 Training {name}")
    args = TrainingArguments(
    output_dir=f"output_folder/Model_{name}",
    per_device_train_batch_size=16,
    num_train_epochs=10,
    learning_rate=2e-5,
    logging_dir=f"output_folder/Model_{name}",
    logging_steps=500,
    save_steps=500
)

    trainer = Trainer(
        model=models[name],
        args=args,
        train_dataset=tokenized_datasets[name]["train"],
        eval_dataset=tokenized_datasets[name]["validation"],
        tokenizer=tokenizers[name],
        compute_metrics=compute_metrics
    )
    trainer.train()
    trainers[name] = trainer
    histories[name] = trainer.state.log_history


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


🏁 Huấn luyện phobert


  trainer = Trainer(


Step,Training Loss
500,0.5451
1000,0.4017
1500,0.3237
2000,0.2747
2500,0.2508
3000,0.2109
3500,0.1982
4000,0.1718


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


🏁 Huấn luyện vibert


  trainer = Trainer(


Step,Training Loss
500,0.5359
1000,0.4206
1500,0.358
2000,0.3165
2500,0.3006
3000,0.2827
3500,0.2652
4000,0.2537


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


🏁 Huấn luyện xlmr


  trainer = Trainer(


Step,Training Loss
500,0.6394
1000,0.4896
1500,0.3776
2000,0.3054
2500,0.2613
3000,0.2264
3500,0.1993
4000,0.1727


In [None]:
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_score, recall_score

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    if isinstance(logits, tuple):  # Some models return tuples
        logits = logits[0]
    preds = torch.argmax(torch.tensor(logits), dim=1).numpy()
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "recall": recall_score(labels, preds),
        "precision": precision_score(labels, preds)
    }

# 👉 Evaluate all trained models
for name in models:
    print(f"\n📋 Evaluation for model: {name}")

    # Predict on test set
    preds_output = trainers[name].predict(tokenized_datasets[name]["test"])
    logits = preds_output.predictions
    if isinstance(logits, tuple):
        logits = logits[0]

    preds = torch.argmax(torch.tensor(logits), dim=1).numpy()
    labels = preds_output.label_ids

    # Print classification table
    print(classification_report(labels, preds, digits=4))



📋 Evaluation for model: phobert


              precision    recall  f1-score   support

           0     0.8588    0.7831    0.8192       862
           1     0.8811    0.9259    0.9029      1497

    accuracy                         0.8737      2359
   macro avg     0.8699    0.8545    0.8611      2359
weighted avg     0.8730    0.8737    0.8723      2359


📋 Evaluation for model: vibert


              precision    recall  f1-score   support

           0     0.8984    0.5742    0.7006       862
           1     0.7970    0.9626    0.8720      1497

    accuracy                         0.8207      2359
   macro avg     0.8477    0.7684    0.7863      2359
weighted avg     0.8340    0.8207    0.8094      2359


📋 Evaluation for model: xlmr


              precision    recall  f1-score   support

           0     0.8729    0.7970    0.8332       862
           1     0.8887    0.9332    0.9104      1497

    accuracy                         0.8834      2359
   macro avg     0.8808    0.8651    0.8718      2359
weighted avg     0.8829    0.8834    0.8822      2359



In [None]:
def predict_sentence(model, tokenizer, sentence):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
    inputs = {k: v.to(device) for k, v in inputs.items()}  # move input to same device as model

    with torch.no_grad():
        logits = model(**inputs).logits
        probs = torch.softmax(logits, dim=1)

    pred = "S" if torch.argmax(probs) == 1 else "U"
    return pred, probs
