In [6]:
#Add libraries for data handling
import pandas as pd
import numpy as np

#add libraries for BERT classification and evaluation
from transformers import AutoTokenizer
from sklearn.metrics import (accuracy_score, recall_score, precision_score, f1_score,
                            classification_report,confusion_matrix)

from transformers import TrainingArguments, Trainer
import transformers as tf
import torch
  
#Load tokenizer from huggingface
tokenizer = AutoTokenizer.from_pretrained("Guscode/DKbert-hatespeech-detection") 

#Load model from huggingface
model = tf.AutoModelForSequenceClassification.from_pretrained("Guscode/DKbert-hatespeech-detection")

In [7]:
#Read test set from OffensEval2020
test_df = pd.read_csv( 
    "Test_Hate.tsv",
    sep='\t',
    quotechar='"',
    error_bad_lines=False,
    engine="python",
    encoding="UTF-8"
)
test_df["label"] = np.where(test_df["subtask_a"] == "NOT", 0,1) #add binary label column

In [9]:
#Class for converting tokenized text into a tensor which can be used for prediction
class Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels=None):
        self.encodings = encodings #Define encodings
        self.labels = labels #define labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()} #Define tensor item
        if self.labels: #if labels are included, add to tensor
            item["labels"] = torch.tensor(self.labels[idx]) 
        return item

    def __len__(self):
        return len(self.encodings["input_ids"])

texts = list(test_df.tweet) #create list of strings
texts_tokenized = tokenizer(texts, padding=True, truncation=True, max_length=128) #tokenize strings

# Create torch dataset
text_dataset = Dataset(texts_tokenized)

text_trainer=Trainer(model) #create trainer from model
raw_pred, _, _ = text_trainer.predict(text_dataset) #predict each string as hateful or not
 
binary_preds = [np.argmax(pred) for pred in raw_pred] #Binarize raw predictions


In [14]:
print(classification_report(test_df.label, ypred)) #Print classification report

confusion_matrix(test_df.label, ypred) #print confusion matrix

              precision    recall  f1-score   support

           0       0.93      0.98      0.95       288
           1       0.77      0.49      0.60        41

    accuracy                           0.92       329
   macro avg       0.85      0.73      0.78       329
weighted avg       0.91      0.92      0.91       329



array([[282,   6],
       [ 21,  20]])