## Imports

In [29]:
from transformers import RobertaForSequenceClassification, RobertaTokenizer
from dataset_extract_transform.preprocess import Loader
from training.tokenizer import Tokenizer
import torch
import numpy as np

## Get Testing Dataset

In [52]:
loader = Loader('./offensive_tweet_dataset/labeled_data.csv')
loader.load()
loader.preprocess()
test_set, test_labels = loader.get_testing_data()

In [53]:
tokenizer = Tokenizer(test_set, test_labels)
dataloader = tokenizer.data_loader(32)

model = RobertaForSequenceClassification.from_pretrained('trained_models', local_files_only=True, num_labels=3)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

model.eval()

predictions = np.array([])

for batch in dataloader:
    batch = tuple(b.to(device) for b in batch)

    inputs = {
        'input_ids':      batch[0],
        'attention_mask': batch[1],
        'labels':         batch[2],
    }

    outputs = model(**inputs)
    logits = outputs.logits
    loss = outputs.loss
    predictions = np.append(predictions, np.argmax(logits.detach().numpy(), axis=1))

print(predictions.size == test_labels.size)

  torch.tensor(self.tokens['input_ids']),
  torch.tensor(self.tokens['attention_mask']),


True


## Model Accuracy

In [54]:
true_count = 0
false_count = 0
for pred, label in zip(predictions, test_labels):
    if (pred == label):
        true_count += 1
    else:
        false_count += 1

print(true_count / (true_count + false_count))

0.7730482146459552
