In [1]:
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, Trainer, TrainingArguments, EvalPrediction
from datasets import load_dataset
import evaluate

In [2]:
amazon_test_data = load_dataset('amazon_polarity', split='test').shuffle(seed=42).select(range(25000))

In [3]:
model_distilbert = DistilBertForSequenceClassification.from_pretrained('C:/Boardgames_ABSA/models/distilbert_model')
tokenizer_distilbert = DistilBertTokenizer.from_pretrained('C:/Boardgames_ABSA/models/distilbert_model')

In [5]:
# Tokenizing the test data
def tokenize_function(examples):
    return tokenizer_distilbert(examples['content'], padding='max_length', truncation=True, max_length=512)

In [6]:
tokenized_test_data = amazon_test_data.map(tokenize_function, batched=True)

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

In [7]:
accuracy_metric = evaluate.load("accuracy")
precision_metric = evaluate.load("precision")
recall_metric = evaluate.load("recall")
f1_metric = evaluate.load("f1")

In [8]:
def compute_metrics(eval_pred: EvalPrediction):
    logits, labels = eval_pred
    logits = torch.tensor(logits)
    labels = torch.tensor(labels)

    predictions = torch.argmax(logits, dim=1)

    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)['accuracy']
    precision = precision_metric.compute(predictions=predictions, references=labels, average='weighted')['precision']
    recall = recall_metric.compute(predictions=predictions, references=labels, average='weighted')['recall']
    f1 = f1_metric.compute(predictions=predictions, references=labels, average='weighted')['f1']
    
    return {'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1': f1}


In [9]:
eval_args = TrainingArguments(
    output_dir='C:/Boardgames_ABSA/results/distilbert_eval',
    per_device_eval_batch_size=32
)

In [10]:
trainer_distilbert = Trainer(
    model=model_distilbert,
    args=eval_args,
    eval_dataset=tokenized_test_data,
    compute_metrics=compute_metrics
)

In [11]:
# Evaluating the model
results_distilbert = trainer_distilbert.evaluate()
print(f"DistilBERT Evaluation Results:\n{results_distilbert}")

  0%|          | 0/782 [00:00<?, ?it/s]

DistilBERT Evaluation Results:
{'eval_loss': 0.22553004324436188, 'eval_accuracy': 0.9414, 'eval_precision': 0.9413999959217612, 'eval_recall': 0.9414, 'eval_f1': 0.9413998759512312, 'eval_runtime': 15692.1403, 'eval_samples_per_second': 1.593, 'eval_steps_per_second': 0.05}
