In [1]:
import torch
from transformers import AlbertTokenizer, AlbertForSequenceClassification, Trainer, TrainingArguments, EvalPrediction
from datasets import load_dataset
import evaluate

In [2]:
imdb_test_data = load_dataset('imdb', split='test')

In [7]:
model_albert = AlbertForSequenceClassification.from_pretrained('C:/Boardgames_ABSA/models/albert_model')
tokenizer_albert = AlbertTokenizer.from_pretrained('C:/Boardgames_ABSA/models/albert_model')

In [8]:
def tokenize_function(examples):
    return tokenizer_albert(examples['text'], padding='max_length', truncation=True, max_length=512)

In [9]:
tokenized_test_data = imdb_test_data.map(tokenize_function, batched=True)

In [10]:
accuracy_metric = evaluate.load("accuracy")
precision_metric = evaluate.load("precision")
recall_metric = evaluate.load("recall")
f1_metric = evaluate.load("f1")

In [11]:
def compute_metrics(eval_pred: EvalPrediction):
    logits, labels = eval_pred
    logits = torch.tensor(logits)
    labels = torch.tensor(labels)

    predictions = torch.argmax(logits, dim=1)

    # Calculate metrics using the 'evaluate' library
    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)['accuracy']
    precision = precision_metric.compute(predictions=predictions, references=labels, average='weighted')['precision']
    recall = recall_metric.compute(predictions=predictions, references=labels, average='weighted')['recall']
    f1 = f1_metric.compute(predictions=predictions, references=labels, average='weighted')['f1']

    return {'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1': f1}

In [12]:
eval_args = TrainingArguments(
    output_dir='C:/Boardgames_ABSA/results/albert_eval',
    per_device_eval_batch_size=32
)

In [13]:
trainer_albert = Trainer(
    model=model_albert,
    args=eval_args,
    eval_dataset=tokenized_test_data,
    compute_metrics=compute_metrics
)

In [14]:
# Evaluating the model
results_albert = trainer_albert.evaluate()
print(f"ALBERT Evaluation Results:\n{results_albert}")

  0%|          | 0/782 [00:00<?, ?it/s]

ALBERT Evaluation Results:
{'eval_loss': 0.2727893888950348, 'eval_accuracy': 0.93916, 'eval_precision': 0.9391685023022046, 'eval_recall': 0.93916, 'eval_f1': 0.9391597055329748, 'eval_runtime': 27056.0773, 'eval_samples_per_second': 0.924, 'eval_steps_per_second': 0.029}
