In [1]:
from torch.utils.data import Dataset, DataLoader
from dataset.dataset import create_dataset

train_dataset, val_dataset, test_dataset = create_dataset(data_path = 'dataset/preprocessed_hotel', 
                                                            batch_size = 2)

# Create DataLoaders
train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=2, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=2, shuffle=False)

Train Dataset: 3000
Val Dataset: 2000
Test Dataset: 600


In [2]:
# Import Model
from models.roberta_model import ASBA_PhoBertCustomModel
model = ASBA_PhoBertCustomModel(roberta_version = 'vinai/phobert-base-v2', 
                                num_labels = train_dataset.num_labels(),
                                num_epochs_freeze = 2,
                                unfreeze_steps = 1,)

Some weights of RobertaModel were not initialized from the model checkpoint at vinai/phobert-base-v2 and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Using device: cpu
Train Only Classifier Layer


In [None]:
batch = next(iter(train_dataloader))
model.eval()

from torch import nn
criterion = nn.CrossEntropyLoss()

In [23]:
device = 'cpu'

input_ids, attention_mask = batch['input_ids'].to(device), batch['attention_mask'].to(device)
labels = batch['labels'].to(device)

output = model(input_ids=input_ids, attention_mask=attention_mask)
loss = criterion(output.mT, labels)

In [25]:
prediction = output.argmax(dim=-1).view(-1).cpu().numpy()
true = labels.view(-1).cpu().numpy()

prediction, true

(array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3], dtype=int64),
 array([1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 3, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
        0, 3], dtype=int64))

In [31]:
# Aspect Identification Metrics

from sklearn.metrics import precision_recall_fscore_support, accuracy_score

aspect_true = true != 0  # True aspects are those that are not 'None'
aspect_pred = prediction != 0  # Predicted aspects are those that are not 'None'

acc = accuracy_score(aspect_true, aspect_pred)
aspect_precision, aspect_recall, aspect_f1, _ = precision_recall_fscore_support(aspect_true, aspect_pred, average='macro', zero_division=0)

print(f"Aspect Identification - Accuracy: {acc}, Precision: {aspect_precision}, Recall: {aspect_recall}, F1-Score: {aspect_f1}")

Aspect Identification - Accuracy: 0.14705882352941177, Precision: 0.07352941176470588, Recall: 0.5, F1-Score: 0.1282051282051282


In [28]:
correct_aspects = aspect_true & aspect_pred
correct_aspects

array([ True, False,  True, False, False, False, False, False, False,
       False, False, False, False,  True, False, False, False, False,
       False, False, False,  True, False, False, False, False, False,
       False, False, False, False, False, False,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False,  True, False,  True, False, False, False,
       False,  True, False, False, False, False, False, False,  True,
       False, False, False, False,  True])

In [32]:
# Sentiment Classification Metrics (only for correctly identified aspects)
correct_aspects = aspect_true & aspect_pred

sentiment_accuracy = accuracy_score(true[correct_aspects], prediction[correct_aspects])
sentiment_precision, sentiment_recall, sentiment_f1, _ = precision_recall_fscore_support(true[correct_aspects], prediction[correct_aspects], average='macro', zero_division=0)

print(f"Sentiment Classification - Accuracy: {sentiment_accuracy}, Precision: {sentiment_precision}, Recall: {sentiment_recall}, F1-Score: {sentiment_f1}")

Sentiment Classification - Accuracy: 0.3, Precision: 0.09999999999999999, Recall: 0.3333333333333333, F1-Score: 0.15384615384615385
