In [87]:
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, pipeline
from sklearn.metrics import f1_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# HateXplain Dataset

### Content
- **Textual Data**: The dataset contains social media posts from Twitter and Gab.
- **Labels**: Each post is labeled as "normal," "offensive," or "hate," indicating the severity of the language.

### Annotations
- **Label Annotations**: Posts are annotated by multiple human annotators to ensure consistent labeling.
- **Rationales**: Annotators provide explanations highlighting specific parts of the text that influenced their labeling decision.
- **Target Communities**: Annotations include information on which communities or groups are targeted by the hate speech.

In [88]:
dataset = load_dataset("hatexplain")
dataset

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


DatasetDict({
    train: Dataset({
        features: ['id', 'annotators', 'rationales', 'post_tokens'],
        num_rows: 15383
    })
    validation: Dataset({
        features: ['id', 'annotators', 'rationales', 'post_tokens'],
        num_rows: 1922
    })
    test: Dataset({
        features: ['id', 'annotators', 'rationales', 'post_tokens'],
        num_rows: 1924
    })
})

# Preprocessing
- I'll classify both the offensiveness in a ordinal fashion
- and the targets of the sentence

In [89]:
X_train = dataset["train"]["post_tokens"]
X_val = dataset["validation"]["post_tokens"]
X_test = dataset["test"]["post_tokens"]

X_train = [" ".join(sublist) for sublist in X_train]
X_val = [" ".join(sublist) for sublist in X_val]
X_test = [" ".join(sublist) for sublist in X_test]

y_train_lbl = dataset["train"]["annotators"]
y_val_lbl = dataset["validation"]["annotators"]
y_test_lbl = dataset["test"]["annotators"]

y_train_lbl = [el["label"] for el in y_train_lbl]
y_val_lbl = [el["label"] for el in y_val_lbl]
y_test_lbl = [el["label"] for el in y_test_lbl]

y_train_targets = dataset["train"]["annotators"]
y_val_targets = dataset["validation"]["annotators"]
y_test_targets = dataset["test"]["annotators"]

y_train_targets = [el["target"] for el in y_train_targets]
y_val_targets = [el["target"] for el in y_val_targets]
y_test_targets = [el["target"] for el in y_test_targets]


In [90]:
id1 = 140
id2 = 145

for el1, el2, el3 in zip(X_train[id1:id2],y_train_lbl[id1:id2],y_train_targets[id1:id2]):
    print(el1,el2,el3,sep="\n",end="\n\n")

lesson of the week from the holy book the sovereign individual once commerce flows over the internet suppression of speech becomes unstoppable devolution inevitable and the monopoly of violence by governments will end paraphrased from j d davidson rhys mogg
[1, 1, 1]
[['None'], ['None'], ['None']]

watch bbc on muslims abandoning eu <number> syria the father honestly says it bcs they do not like girls being told they have freewill by supposedly educated pple bcs women don t in islam sharia sharialaw realitycheck fgm misogyny hijab rapeculture abuse violence woman women girl girls womansrights womensrights brexit
[1, 1, 1]
[['Islam', 'Women'], ['Arab', 'Islam', 'Women'], ['Arab', 'Islam']]

<user> whatchu are <user> is a self hating homosexual who has to pay for your homosexual desires
[2, 2, 2]
[['Homosexual'], ['Homosexual'], ['Homosexual']]

the parade needed at least one severed head in honor of muslim culture
[0, 2, 0]
[['Islam'], ['Islam'], ['Islam']]

i look at the mudslime news 

I will average and normalize the label values. The actual situation is that for every sentence we have three different annotation which can take the 3 values:
- 0 - hatespeech
- 1 - normal
- 2 - offensive

I'll first reorder them, so that

- 2 = hate
- 1 = offensive
- 0 = normal

than, since for each sentence there are three different annotation, I'll average them and normalize in the range [0, 1]

(e.g., "they playing a lot of ethnic music at this white ass wedding" has labels [2, 1, 1] -> [1, 0, 0] -> 0.333 -> 0.165)

In [91]:
lbl_map = {
    0:2,
    1:0,
    2:1
}

def avg3(ls):
    return (lbl_map[ls[0]]+lbl_map[ls[1]]+lbl_map[ls[2]])/6

y_train_lbl = [avg3(x) for x in y_train_lbl]
y_val_lbl = [avg3(x) for x in y_val_lbl]
y_test_lbl = [avg3(x) for x in y_test_lbl]

I'll also hold a categorical equivalent:
- normal  (0 -> 0.33)
- offensive (0.33 -> 0.66)
- hatespeech  (0.66 -> 1)

In [92]:
def ordinalToCategorical (hateScore):
    if hateScore < 0.33:
        return 0
    elif hateScore < 0.66:
        return 1
    else:
        return 2
    
y_train_lbl_cat = [ordinalToCategorical(i) for i in y_train_lbl]
y_val_lbl_cat = [ordinalToCategorical(i) for i in y_val_lbl]
y_test_lbl_cat = [ordinalToCategorical(i) for i in y_test_lbl]
y_train_lbl_cat

[2,
 2,
 1,
 1,
 1,
 0,
 2,
 0,
 0,
 2,
 1,
 0,
 0,
 0,
 2,
 1,
 2,
 2,
 1,
 0,
 0,
 0,
 0,
 2,
 0,
 2,
 2,
 2,
 2,
 0,
 1,
 2,
 0,
 2,
 0,
 0,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 0,
 2,
 1,
 2,
 2,
 1,
 2,
 1,
 2,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 1,
 2,
 0,
 2,
 2,
 1,
 2,
 2,
 1,
 2,
 2,
 2,
 2,
 0,
 2,
 2,
 1,
 2,
 2,
 2,
 1,
 2,
 2,
 2,
 0,
 2,
 0,
 2,
 1,
 2,
 2,
 2,
 2,
 2,
 1,
 2,
 2,
 0,
 2,
 1,
 1,
 1,
 0,
 2,
 1,
 1,
 1,
 2,
 0,
 2,
 2,
 1,
 2,
 0,
 2,
 2,
 2,
 1,
 2,
 1,
 2,
 1,
 2,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 1,
 0,
 2,
 0,
 0,
 1,
 2,
 2,
 2,
 1,
 1,
 1,
 0,
 1,
 2,
 1,
 2,
 2,
 0,
 1,
 1,
 2,
 0,
 0,
 2,
 0,
 0,
 2,
 1,
 2,
 2,
 2,
 0,
 2,
 0,
 1,
 2,
 2,
 2,
 2,
 0,
 2,
 0,
 0,
 2,
 2,
 2,
 0,
 0,
 0,
 0,
 2,
 0,
 0,
 1,
 2,
 2,
 1,
 2,
 0,
 2,
 2,
 0,
 2,
 1,
 0,
 2,
 2,
 2,
 2,
 2,
 2,
 0,
 2,
 0,
 2,
 2,
 0,
 2,
 1,
 0,
 2,
 2,
 1,
 1,
 1,
 2,
 0,
 2,
 2,
 1,
 2,
 1,
 2,
 1,
 0,
 0,
 2,
 0,
 0,
 0,
 1,
 0,
 2,
 2,
 0,
 2,
 1,
 2,
 0,
 1,
 2,
 0,


A similar procedure should be done also for the targets, If two of the three annotators mentioned the same target I'll maintain it

In [93]:
# Done by ChatGPT 4o

# Prompt:
# Suppose you have three lists of classes,
# if the same class is in at least 2 classes
# you add it to a new list to return. give me the code

from collections import Counter

def find_common_classes(list1, list2, list3):
    # Combine all lists into one
    combined_list = list1 + list2 + list3
    
    # Create a counter to count occurrences of each element
    counter = Counter(combined_list)
    
    # Create a list to store elements that appear in at least two of the lists
    result = [item for item, count in counter.items() if count >= 2]
    
    return result

In [94]:
y_train_targets = [find_common_classes(trgts[0],trgts[1],trgts[2]) for trgts in y_train_targets]
y_val_targets = [find_common_classes(trgts[0],trgts[1],trgts[2]) for trgts in y_val_targets]
y_test_targets = [find_common_classes(trgts[0],trgts[1],trgts[2]) for trgts in y_test_targets]

Sample example of the changed dataset

In [95]:
id1 = 140
id2 = 145

for sen, lbl, catlbl, targets in zip(X_train[id1:id2],y_train_lbl[id1:id2],y_train_lbl_cat[id1:id2],y_train_targets[id1:id2]):
    print(sen,lbl,f"Category: {catlbl}",targets,end="\n\n",sep="\n")

lesson of the week from the holy book the sovereign individual once commerce flows over the internet suppression of speech becomes unstoppable devolution inevitable and the monopoly of violence by governments will end paraphrased from j d davidson rhys mogg
0.0
Category: 0
['None']

watch bbc on muslims abandoning eu <number> syria the father honestly says it bcs they do not like girls being told they have freewill by supposedly educated pple bcs women don t in islam sharia sharialaw realitycheck fgm misogyny hijab rapeculture abuse violence woman women girl girls womansrights womensrights brexit
0.0
Category: 0
['Islam', 'Women', 'Arab']

<user> whatchu are <user> is a self hating homosexual who has to pay for your homosexual desires
0.5
Category: 1
['Homosexual']

the parade needed at least one severed head in honor of muslim culture
0.8333333333333334
Category: 2
['Islam']

i look at the mudslime news from europe and think i wish some goat fucker would try that shit down here in lou

In [96]:
classes = set()
for ls in y_train_targets:
    for el in ls:
        classes.add(el)

print(classes)
print(len(classes))

{'Islam', 'Disability', 'Men', 'None', 'Refugee', 'Other', 'Economic', 'Caucasian', 'African', 'Indian', 'Jewish', 'Indigenous', 'Women', 'Homosexual', 'Hindu', 'Christian', 'Buddhism', 'Hispanic', 'Arab', 'Asian'}
20


# BERT MODEL

Since BERT wants a datasetdict I'm recostructing it with the updated data

In [97]:
from datasets import Dataset, DatasetDict
import pandas as pd

train_data = {'text': X_train, 'label': y_train_lbl_cat}
val_data = {'text': X_val, 'label': y_val_lbl_cat}
test_data = {'text': X_test, 'label': y_test_lbl_cat}

df_train = pd.DataFrame(train_data)
df_val = pd.DataFrame(val_data)
df_test = pd.DataFrame(test_data)

train_dataset = Dataset.from_pandas(df_train)
val_dataset = Dataset.from_pandas(df_val)
test_dataset = Dataset.from_pandas(df_test)

hateXplain = DatasetDict({
    "train": train_dataset,
    "validation": val_dataset,
    "test": test_dataset
})

In [100]:
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_function(batch):
    return tokenizer(batch["text"], padding=True, truncation=True)

hateXplain_encoded = hateXplain.map(tokenize_function, batched=True, batch_size=None)

print(hateXplain_encoded["train"][0])

Map: 100%|██████████| 15383/15383 [00:01<00:00, 14108.05 examples/s]
Map: 100%|██████████| 1922/1922 [00:00<00:00, 17155.79 examples/s]
Map: 100%|██████████| 1924/1924 [00:00<00:00, 20925.73 examples/s]

{'text': 'u really think i would not have been raped by feral hindu or muslim back in india or bangladesh and a neo nazi would rape me as well just to see me cry', 'label': 2, 'input_ids': [101, 1057, 2428, 2228, 1045, 2052, 2025, 2031, 2042, 15504, 2011, 18993, 7560, 2030, 5152, 2067, 1999, 2634, 2030, 7269, 1998, 1037, 9253, 6394, 2052, 9040, 2033, 2004, 2092, 2074, 2000, 2156, 2033, 5390, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0




In [101]:
print(model_name)
num_labels = 3
id2label = {0: "normal", 1: "offensive", 2: "hatespeech"}
label2id = {"normal": 0, "offensive": 1, "hatespeech": 2}
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels, id2label=id2label, label2id=label2id)

distilbert-base-uncased


In [102]:
model

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
 

In [103]:
def get_F1(preds):
  preds_preds = preds.predictions[0] if isinstance(preds.predictions, tuple) else preds.predictions
  predictions = preds_preds.argmax(axis=-1)
  labels = preds.label_ids
  accuracy = f1_score(labels, predictions, average='weighted')
  return {'F1 Score': accuracy}

In [104]:
batch_size = 16
epochs = 5
logging_steps = len(hateXplain_encoded["train"]) // batch_size
model_name_output_dir = model_name.replace("/", "-")+"-finetuned-hateXplain"
training_args = TrainingArguments(output_dir=model_name_output_dir,
                                  num_train_epochs=epochs,
                                  learning_rate=1e-4,
                                  per_device_train_batch_size=batch_size,
                                  per_device_eval_batch_size=batch_size,
                                  weight_decay=0.01,
                                  evaluation_strategy="epoch",
                                  disable_tqdm=False,
                                  logging_steps=logging_steps,
                                  log_level="error",
                                  optim='adamw_torch'
                                  )

In [105]:
trainer = Trainer(model=model,
                  args=training_args,
                  compute_metrics=get_F1,
                  train_dataset=hateXplain_encoded["train"],
                  eval_dataset=hateXplain_encoded["validation"],
                  tokenizer=tokenizer)
trainer.train()

Epoch,Training Loss,Validation Loss,F1 score
1,0.8077,0.769226,0.628209
2,0.6396,0.746986,0.678203
3,0.4444,0.872729,0.655767
4,0.269,1.216097,0.656839
5,0.1621,1.506326,0.660357


TrainOutput(global_step=4810, training_loss=0.464194241767118, metrics={'train_runtime': 972.0548, 'train_samples_per_second': 79.126, 'train_steps_per_second': 4.948, 'total_flos': 3562139040585570.0, 'train_loss': 0.464194241767118, 'epoch': 5.0})

In [106]:
trainer.save_model()
trainer.evaluate()

{'eval_loss': 1.5063261985778809,
 'eval_F1 Score': 0.6603572345117554,
 'eval_runtime': 4.3841,
 'eval_samples_per_second': 438.399,
 'eval_steps_per_second': 27.6,
 'epoch': 5.0}

# Uploading and testing

In [5]:
from transformers import AutoModelForSequenceClassification, pipeline, AutoTokenizer

model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased-finetuned-hateXplain')
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)



In [7]:
classifier = pipeline('text-classification', model=model, tokenizer=tokenizer)
print(classifier('Bruce lee worst chinese actor'))
print(classifier('Bruce lee chinese dog'))
print(classifier("Imagine having bruce lee in the USA government"))

[{'label': 'normal', 'score': 0.9897294044494629}]
[{'label': 'hatespeech', 'score': 0.600603461265564}]
[{'label': 'offensive', 'score': 0.9806431531906128}]
