In [1]:
from datasets import load_dataset
import evaluate
import numpy
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import DataCollatorWithPadding, EarlyStoppingCallback, RobertaForSequenceClassification, RobertaTokenizer, TrainingArguments, Trainer

In [2]:
def CUDA_CPU():
    if torch.cuda.is_available():
        print("CUDA is available. Using CUDA.")
        return 'cuda'
    else:
        print("CUDA isn't available. Using CPU.")
        return 'cpu'

device = CUDA_CPU()

class DatasetAttr(Dataset):
    def __init__(self, dataset, tokenizer):
        self.dataset = dataset
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        item = self.dataset[idx]
        tokenized_input = self.tokenizer(item['Sentence'], padding = "max_length", truncation = True, max_length = 512)
        return {**tokenized_input, "labels": item['Sentiment']}

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim = 1).numpy()

    accuracy = evaluate.load("accuracy")
    f1 = evaluate.load("f1")
    precision = evaluate.load("precision")
    recall = evaluate.load("recall")

    results = {
        "accuracy": accuracy.compute(predictions = predictions, references = labels)["accuracy"],
        "f1": f1.compute(predictions = predictions, references = labels, average = "macro")["f1"],
        "precision": precision.compute(predictions = predictions, references = labels, average = "macro", zero_division = 0)["precision"],
        "recall": recall.compute(predictions = predictions, references = labels, average = "macro", zero_division = 0)["recall"],
    }

    return results

CUDA is available. Using CUDA.


In [3]:
# Load the tokenizer and the saved model
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForSequenceClassification.from_pretrained("./saved_model").to(device)

# Load and prepare your test dataset
dataset = load_dataset('---dataset---')
train_validtest = dataset['train'].train_test_split(test_size = 0.4)
valid_test = train_validtest['train'].train_test_split(test_size = 0.5)
train_set, valid_set, test_set = train_validtest['train'], valid_test['train'], valid_test['test']

test_dataset = DatasetAttr(test_set, tokenizer)

# Setup Training Arguments for Evaluation
training_args = TrainingArguments(
    output_dir = "./results",
    per_device_eval_batch_size = 8,
)

# Initialize the Trainer for evaluation
trainer = Trainer(
    model = model,
    args = training_args,
    compute_metrics = compute_metrics,
)

# Evaluate the model on the test dataset
test_results = trainer.evaluate(test_dataset)
print(f"Model Evaluation Results on Test Dataset: {test_results}")

Model Evaluation Results on Test Dataset: {'eval_loss': 0.7672545909881592, 'eval_accuracy': 0.7992013690815745, 'eval_f1': 0.7304783886158228, 'eval_precision': 0.7337616233010286, 'eval_recall': 0.7277234995361731, 'eval_runtime': 24.3177, 'eval_samples_per_second': 72.088, 'eval_steps_per_second': 9.047}
