In [26]:
import torch
import numpy as np
import pandas as pd
from datasets import load_dataset, Dataset, DatasetDict
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, pipeline, EvalPrediction
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score
from transformers import EvalPrediction

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# HateXplain Dataset

### Content
- **Textual Data**: The dataset contains social media posts from Twitter and Gab.
- **Labels**: Each post is labeled as "normal," "offensive," or "hate," indicating the severity of the language.

### Annotations
- **Label Annotations**: Posts are annotated by multiple human annotators to ensure consistent labeling.
- **Rationales**: Annotators provide explanations highlighting specific parts of the text that influenced their labeling decision.
- **Target Communities**: Annotations include information on which communities or groups are targeted by the hate speech.

In [2]:
dataset = load_dataset("hatexplain", trust_remote_code=True)
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'annotators', 'rationales', 'post_tokens'],
        num_rows: 15383
    })
    validation: Dataset({
        features: ['id', 'annotators', 'rationales', 'post_tokens'],
        num_rows: 1922
    })
    test: Dataset({
        features: ['id', 'annotators', 'rationales', 'post_tokens'],
        num_rows: 1924
    })
})

# Preprocessing
- I'll classify both the offensiveness in a ordinal fashion
- and the targets of the sentence

In [3]:
X_train = dataset["train"]["post_tokens"]
X_val = dataset["validation"]["post_tokens"]
X_test = dataset["test"]["post_tokens"]

X_train = [" ".join(sublist) for sublist in X_train]
X_val = [" ".join(sublist) for sublist in X_val]
X_test = [" ".join(sublist) for sublist in X_test]

y_train_lbl = dataset["train"]["annotators"]
y_val_lbl = dataset["validation"]["annotators"]
y_test_lbl = dataset["test"]["annotators"]

y_train_lbl = [el["label"] for el in y_train_lbl]
y_val_lbl = [el["label"] for el in y_val_lbl]
y_test_lbl = [el["label"] for el in y_test_lbl]

y_train_targets = dataset["train"]["annotators"]
y_val_targets = dataset["validation"]["annotators"]
y_test_targets = dataset["test"]["annotators"]

y_train_targets = [el["target"] for el in y_train_targets]
y_val_targets = [el["target"] for el in y_val_targets]
y_test_targets = [el["target"] for el in y_test_targets]


In [4]:
id1 = 140
id2 = 145

for el1, el2, el3 in zip(X_train[id1:id2],y_train_lbl[id1:id2],y_train_targets[id1:id2]):
    print(el1,el2,el3,sep="\n",end="\n\n")

lesson of the week from the holy book the sovereign individual once commerce flows over the internet suppression of speech becomes unstoppable devolution inevitable and the monopoly of violence by governments will end paraphrased from j d davidson rhys mogg
[1, 1, 1]
[['None'], ['None'], ['None']]

watch bbc on muslims abandoning eu <number> syria the father honestly says it bcs they do not like girls being told they have freewill by supposedly educated pple bcs women don t in islam sharia sharialaw realitycheck fgm misogyny hijab rapeculture abuse violence woman women girl girls womansrights womensrights brexit
[1, 1, 1]
[['Islam', 'Women'], ['Arab', 'Islam', 'Women'], ['Arab', 'Islam']]

<user> whatchu are <user> is a self hating homosexual who has to pay for your homosexual desires
[2, 2, 2]
[['Homosexual'], ['Homosexual'], ['Homosexual']]

the parade needed at least one severed head in honor of muslim culture
[0, 2, 0]
[['Islam'], ['Islam'], ['Islam']]

i look at the mudslime news 

## Preparing Labels

I will average and normalize the label values. The actual situation is that for every sentence we have three different annotation which can take the 3 values:
- 0 - hatespeech
- 1 - normal
- 2 - offensive

I'll first reorder them, so that

- 2 = hate
- 1 = offensive
- 0 = normal

than, since for each sentence there are three different annotation, I'll average them and normalize in the range [0, 1]

(e.g., "they playing a lot of ethnic music at this white ass wedding" has labels [2, 1, 1] -> [1, 0, 0] -> 0.333 -> 0.165)

In [5]:
lbl_map = {
    0:2,
    1:0,
    2:1
}

def avg3(ls):
    return (lbl_map[ls[0]]+lbl_map[ls[1]]+lbl_map[ls[2]])/6

y_train_lbl = [avg3(x) for x in y_train_lbl]
y_val_lbl = [avg3(x) for x in y_val_lbl]
y_test_lbl = [avg3(x) for x in y_test_lbl]

I'll also hold a categorical equivalent:
- normal  (0 -> 0.33)
- offensive (0.33 -> 0.66)
- hatespeech  (0.66 -> 1)

In [6]:
def ordinalToCategorical (hateScore):
    if hateScore < 0.33:
        return 0
    elif hateScore < 0.66:
        return 1
    else:
        return 2
    
y_train_lbl_cat = [ordinalToCategorical(i) for i in y_train_lbl]
y_val_lbl_cat = [ordinalToCategorical(i) for i in y_val_lbl]
y_test_lbl_cat = [ordinalToCategorical(i) for i in y_test_lbl]
y_train_lbl_cat[:20]

[2, 2, 1, 1, 1, 0, 2, 0, 0, 2, 1, 0, 0, 0, 2, 1, 2, 2, 1, 0]

## Preparing Targets

A similar procedure should be done also for the targets, If two of the three annotators mentioned the same target I'll maintain it

In [7]:
# Done by ChatGPT 4o

# Prompt:
# Suppose you have three lists of classes,
# if the same class is in at least 2 classes
# you add it to a new list to return. give me the code

from collections import Counter

def find_common_classes(list1, list2, list3):
    # Combine all lists into one
    combined_list = list1 + list2 + list3
    
    # Create a counter to count occurrences of each element
    counter = Counter(combined_list)
    
    # Create a list to store elements that appear in at least two of the lists
    result = [item for item, count in counter.items() if count >= 2]
    
    return result

In [8]:
y_train_targets = [find_common_classes(trgts[0],trgts[1],trgts[2]) for trgts in y_train_targets]
y_val_targets = [find_common_classes(trgts[0],trgts[1],trgts[2]) for trgts in y_val_targets]
y_test_targets = [find_common_classes(trgts[0],trgts[1],trgts[2]) for trgts in y_test_targets]

all_targets = y_train_targets + y_val_targets + y_test_targets

In [9]:
classes = set()
for ls in all_targets:
    for el in ls:
        classes.add(el)

print(classes)
print(len(classes))

{'Refugee', 'Indigenous', 'Men', 'Caucasian', 'Arab', 'Buddhism', 'Indian', 'Disability', 'Hispanic', 'Women', 'Asian', 'Other', 'Hindu', 'Christian', 'None', 'Homosexual', 'Islam', 'Jewish', 'African', 'Economic'}
20


Removing low incidence groups for tagging

In [10]:
classCounter = dict()

for el in classes:
    classCounter[el] = 0

for ls in all_targets:
    for el in ls:
        classCounter[el] += 1

sorted_classCounter = dict(sorted(classCounter.items(), key=lambda item: item[1], reverse=True))
print("number of posts where class is mentioned",sorted_classCounter,sep="\n",end="\n\n")

consideredClasses = set()

for el in classCounter.items():
    if el[1] > 100:
        consideredClasses.add(el[0])

print("target classes with more than 100 posts",consideredClasses,sep="\n")

number of posts where class is mentioned
{'None': 6514, 'African': 3166, 'Islam': 2111, 'Jewish': 1925, 'Homosexual': 1837, 'Women': 1534, 'Refugee': 848, 'Other': 755, 'Arab': 753, 'Caucasian': 497, 'Asian': 383, 'Hispanic': 357, 'Men': 84, 'Disability': 54, 'Christian': 45, 'Hindu': 17, 'Indian': 10, 'Economic': 9, 'Buddhism': 2, 'Indigenous': 1}

target classes with more than 100 posts
{'Refugee', 'Jewish', 'Homosexual', 'Women', 'African', 'Caucasian', 'Asian', 'Other', 'Hispanic', 'None', 'Arab', 'Islam'}


In [11]:
print(y_train_targets)

for i,ls in enumerate(y_train_targets):
    y_train_targets[i] = [consClass for consClass in ls if consClass in consideredClasses]

for i,ls in enumerate(y_val_targets):
    y_val_targets[i] = [consClass for consClass in ls if consClass in consideredClasses]

for i,ls in enumerate(y_test_targets):
    y_test_targets[i] = [consClass for consClass in ls if consClass in consideredClasses]

print(y_train_targets)

[['Hindu', 'Islam'], ['Refugee', 'Indian'], ['Other', 'Hindu'], ['Hindu'], ['Economic'], ['Hispanic', 'Refugee'], ['Women', 'Disability', 'Islam', 'Homosexual'], ['Hindu', 'Islam'], ['Christian', 'Islam'], ['Refugee'], ['Caucasian', 'Women'], ['Caucasian'], ['Caucasian'], ['Caucasian'], ['Refugee'], ['Caucasian'], ['Refugee'], ['African', 'Disability', 'Homosexual'], ['Refugee'], ['Caucasian'], ['African', 'Other'], ['African', 'Caucasian', 'Jewish'], ['Caucasian'], ['Asian'], ['Refugee'], ['African'], ['African'], ['African'], ['Refugee'], ['Jewish'], ['Hindu', 'Islam'], ['African'], ['African'], ['African'], ['Refugee'], ['Refugee'], ['Asian', 'Arab'], ['African'], ['African', 'Arab', 'Hispanic', 'Jewish'], ['African'], ['Women', 'African'], ['African'], ['African'], ['African'], ['Hindu', 'Islam'], ['Refugee'], ['Homosexual'], ['Homosexual'], ['Homosexual'], ['African', 'Women'], ['Homosexual'], ['African'], ['Arab', 'Hindu', 'Islam'], ['Homosexual'], ['Homosexual'], ['Homosexual'],

This code removes:
- entries where the targets appears less than 100 times
- entries where all the annotators disagree on the target

In [12]:
print(len(X_train),len(y_train_targets))

# By ChatGPT 4o [modified]
#
# i have a list of sentences and a list of targets
# I want to remove each entry if the target ls is empty from both lists, how do I do it?

def removeEmptyTargetsEntries(X,y):

    # Filter out entries where the target is empty
    filtered_pairs = [(s, t) for s, t in zip(X, y) if len(t)!=0]

    # Unzip the filtered pairs back into two separate lists
    filtered_sentences, filtered_targets = zip(*filtered_pairs) if filtered_pairs else ([], [])

    # Convert the tuples back to lists (if needed)
    return list(filtered_sentences),list(filtered_targets)

X_train_targets,y_train_targets = removeEmptyTargetsEntries(X_train,y_train_targets)
X_val_targets,y_val_targets = removeEmptyTargetsEntries(X_val,y_val_targets)
X_test_targets,y_test_targets = removeEmptyTargetsEntries(X_test,y_test_targets)


print(len(X_train_targets),len(y_train_targets))

15383 15383
14538 14538


In [14]:
from sklearn.preprocessing import MultiLabelBinarizer

multilabel = MultiLabelBinarizer()

y_train_targets_bin = multilabel.fit_transform(y_train_targets).astype('float32')
y_val_targets_bin = multilabel.transform(y_val_targets).astype('float32')
y_test_targets_bin = multilabel.transform(y_test_targets).astype('float32')

print(X_train_targets[3],y_train_targets[3])
print(y_train_targets_bin[3])


i live and work with many legal mexican immigrants who are great citizens and trump supporters they have no problem with deporting illegals maga ['Hispanic', 'Refugee']
[0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0.]


Sample example of the changed dataset

In [15]:
id1 = 140
id2 = 145

for sen, lbl, catlbl, targets, targets_bin in zip(X_train[id1:id2],y_train_lbl[id1:id2],y_train_lbl_cat[id1:id2],y_train_targets[id1:id2],y_train_targets_bin[id1:id2]):
    print(sen,lbl,f"Category: {catlbl}",targets,targets_bin,end="\n\n",sep="\n")

lesson of the week from the holy book the sovereign individual once commerce flows over the internet suppression of speech becomes unstoppable devolution inevitable and the monopoly of violence by governments will end paraphrased from j d davidson rhys mogg
0.0
Category: 0
['Islam']
[0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]

watch bbc on muslims abandoning eu <number> syria the father honestly says it bcs they do not like girls being told they have freewill by supposedly educated pple bcs women don t in islam sharia sharialaw realitycheck fgm misogyny hijab rapeculture abuse violence woman women girl girls womansrights womensrights brexit
0.0
Category: 0
['Islam', 'Refugee']
[0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0.]

<user> whatchu are <user> is a self hating homosexual who has to pay for your homosexual desires
0.5
Category: 1
['African', 'Homosexual']
[1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]

the parade needed at least one severed head in honor of muslim culture
0.8333333333333334
Category: 2
[

## Preaparing DatasetDict for Bert Models
* the first one is for classyfing between 0,1,2 labels
* the second one for the target groups

In [67]:
train_data = {'text': X_train, 'label': y_train_lbl_cat}
val_data = {'text': X_val, 'label': y_val_lbl_cat}
test_data = {'text': X_test, 'label': y_test_lbl_cat}

df_train = pd.DataFrame(train_data)
df_val = pd.DataFrame(val_data)
df_test = pd.DataFrame(test_data)

train_dataset = Dataset.from_pandas(df_train)
val_dataset = Dataset.from_pandas(df_val)
test_dataset = Dataset.from_pandas(df_test)

hateXplain = DatasetDict({
    "train": train_dataset,
    "validation": val_dataset,
    "test": test_dataset
})

# BERT MODELS

In [24]:
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_function(batch):
    return tokenizer(batch["text"], padding=True, truncation=True)

hateXplain_encoded = hateXplain.map(tokenize_function, batched=True, batch_size=None)

print(hateXplain_encoded["train"][0])

Map: 100%|██████████| 15383/15383 [00:01<00:00, 10988.55 examples/s]
Map: 100%|██████████| 1922/1922 [00:00<00:00, 24932.20 examples/s]
Map: 100%|██████████| 1924/1924 [00:00<00:00, 26815.72 examples/s]

{'text': 'u really think i would not have been raped by feral hindu or muslim back in india or bangladesh and a neo nazi would rape me as well just to see me cry', 'label': 2, 'input_ids': [101, 1057, 2428, 2228, 1045, 2052, 2025, 2031, 2042, 15504, 2011, 18993, 7560, 2030, 5152, 2067, 1999, 2634, 2030, 7269, 1998, 1037, 9253, 6394, 2052, 9040, 2033, 2004, 2092, 2074, 2000, 2156, 2033, 5390, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0




In [48]:
def get_metrics(preds):
  preds_preds = preds.predictions[0] if isinstance(preds.predictions, tuple) else preds.predictions
  predictions = preds_preds.argmax(axis=-1)
  labels = preds.label_ids

  f1 = f1_score(labels, predictions, average='macro')
  accuracy = accuracy_score(labels, predictions)
  
  return {'F1 Score': f1, 'accuracy': accuracy}

## Fine Tuning - {0,1,2} Labels

In [45]:
num_labels = 3
id2label = {0: "normal", 1: "offensive", 2: "hatespeech"}
label2id = {"normal": 0, "offensive": 1, "hatespeech": 2}
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels, id2label=id2label, label2id=label2id)

distilbert-base-uncased




In [46]:
model

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
 

In [49]:
batch_size = 16
epochs = 2
logging_steps = len(hateXplain_encoded["train"]) // batch_size
model_name_output_dir = model_name.replace("/", "-")+"-finetuned-hateXplain"
training_args_ft = TrainingArguments(output_dir=model_name_output_dir,
                                  num_train_epochs=epochs,
                                  learning_rate=1e-4,
                                  per_device_train_batch_size=batch_size,
                                  per_device_eval_batch_size=batch_size,
                                  weight_decay=0.01,
                                  evaluation_strategy="epoch",
                                  disable_tqdm=False,
                                  logging_steps=logging_steps,
                                  log_level="error",
                                  optim='adamw_torch'
                                  )

In [105]:
trainer = Trainer(model=model,
                  args=training_args_ft,
                  compute_metrics=get_metrics,
                  train_dataset=hateXplain_encoded["train"],
                  eval_dataset=hateXplain_encoded["validation"],
                  tokenizer=tokenizer)
trainer.train()

Epoch,Training Loss,Validation Loss,F1 score
1,0.8077,0.769226,0.628209
2,0.6396,0.746986,0.678203
3,0.4444,0.872729,0.655767
4,0.269,1.216097,0.656839
5,0.1621,1.506326,0.660357


TrainOutput(global_step=4810, training_loss=0.464194241767118, metrics={'train_runtime': 972.0548, 'train_samples_per_second': 79.126, 'train_steps_per_second': 4.948, 'total_flos': 3562139040585570.0, 'train_loss': 0.464194241767118, 'epoch': 5.0})

In [106]:
trainer.save_model()
trainer.evaluate()

{'eval_loss': 1.5063261985778809,
 'eval_F1 Score': 0.6603572345117554,
 'eval_runtime': 4.3841,
 'eval_samples_per_second': 438.399,
 'eval_steps_per_second': 27.6,
 'epoch': 5.0}

### Uploading and testing

In [50]:
model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased-finetuned-hateXplain')
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

batch_size = 16
epochs = 2

model_name_output_dir = model_name.replace("/", "-")+"-finetuned-hateXplain"
training_args_ft = TrainingArguments(output_dir=model_name_output_dir,
                                  num_train_epochs=epochs,
                                  learning_rate=1e-4,
                                  per_device_train_batch_size=batch_size,
                                  per_device_eval_batch_size=batch_size,
                                  weight_decay=0.01,
                                  evaluation_strategy="epoch",
                                  disable_tqdm=False,
                                  log_level="error",
                                  optim='adamw_torch'
                                  )



In [51]:
model.to(device)
trainer = Trainer(model=model, args=training_args_ft, compute_metrics=get_metrics, tokenizer=tokenizer)
preds_ft = trainer.predict(hateXplain_encoded['test'])
print(preds_ft.metrics)

{'test_loss': 1.5016566514968872, 'test_F1 Score': 0.6319176209758944, 'test_accuracy': 0.6574844074844075, 'test_runtime': 4.0205, 'test_samples_per_second': 478.548, 'test_steps_per_second': 30.096}


In [52]:
classifier = pipeline('text-classification', model=model, tokenizer=tokenizer)
print(classifier('Bruce lee worst chinese actor'))
print(classifier('Bruce lee chinese dog'))
print(classifier("Imagine having bruce lee in the USA government"))

[{'label': 'normal', 'score': 0.9897294044494629}]
[{'label': 'hatespeech', 'score': 0.600603461265564}]
[{'label': 'offensive', 'score': 0.9806431531906128}]


## Linear Probing  - {0,1,2} Labels

In [20]:
print(model_name)
num_labels = 3
id2label = {0: "normal", 1: "offensive", 2: "hatespeech"}
label2id = {"normal": 0, "offensive": 1, "hatespeech": 2}
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels, id2label=id2label, label2id=label2id)

distilbert-base-uncased


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [21]:
for name, param in model.named_parameters():
    if 'classifier' not in name:
        param.requires_grad = False
    else:  # classifier layer
        print(name)

pre_classifier.weight
pre_classifier.bias
classifier.weight
classifier.bias


In [30]:
batch_size = 16
epochs = 5
model_name_output_dir = model_name.replace("/", "-")+"-linearprob-hateXplain"

training_args_lp = TrainingArguments(output_dir=model_name_output_dir,
                                  num_train_epochs=epochs,
                                  learning_rate=1e-4,
                                  per_device_train_batch_size=batch_size,
                                  per_device_eval_batch_size=batch_size,
                                  weight_decay=0.01,
                                  evaluation_strategy="epoch",
                                  disable_tqdm=False,
                                  log_level="error",
                                  optim='adamw_torch'
                                  )

In [24]:
trainer = Trainer(model=model,
                  args=training_args_lp,
                  compute_metrics=get_F1,
                  train_dataset=hateXplain_encoded["train"],
                  eval_dataset=hateXplain_encoded["validation"],
                  tokenizer=tokenizer)
trainer.train()
trainer.save_model()

Epoch,Training Loss,Validation Loss,F1 score
1,0.9954,0.930152,0.513806
2,0.9309,0.92645,0.528405
3,0.9138,0.900136,0.531505
4,0.8989,0.894373,0.547743
5,0.89,0.893638,0.534414


TrainOutput(global_step=4810, training_loss=0.9228046758259161, metrics={'train_runtime': 364.1725, 'train_samples_per_second': 211.205, 'train_steps_per_second': 13.208, 'total_flos': 3562139040585570.0, 'train_loss': 0.9228046758259161, 'epoch': 5.0})

### Uploading and Testing - Linear Prob

In [53]:
model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased-linearprob-hateXplain')
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)



In [55]:
trainer = Trainer(model=model, args=training_args_lp, compute_metrics=get_metrics, tokenizer=tokenizer, eval_dataset=hateXplain_encoded["validation"])
trainer.evaluate()

{'eval_loss': 0.8942822813987732,
 'eval_F1 Score': 0.479689145518902,
 'eval_accuracy': 0.5962539021852237,
 'eval_runtime': 4.2076,
 'eval_samples_per_second': 456.792,
 'eval_steps_per_second': 28.757}

In [56]:
model.to(device)
preds_probing = trainer.predict(hateXplain_encoded['test'])
print(preds_probing.metrics)

{'test_loss': 0.9055814146995544, 'test_F1 Score': 0.4776900138139939, 'test_accuracy': 0.5883575883575883, 'test_runtime': 3.9898, 'test_samples_per_second': 482.233, 'test_steps_per_second': 30.328}


In [76]:
classifier = pipeline('text-classification', model=model, tokenizer=tokenizer)
print(classifier('Bruce lee worst chinese actor'))
print(classifier('Bruce lee chinese dog'))
print(classifier("Imagine having bruce lee in the USA government"))

[{'label': 'normal', 'score': 0.8035374283790588}]
[{'label': 'hatespeech', 'score': 0.6116581559181213}]
[{'label': 'normal', 'score': 0.45415931940078735}]


# Group Tagging with BERT - only finetuning

## Preprocessing Steps

In [27]:
# Lets build custom dataset
class CustomDataset(Dataset):
  def __init__(self, texts, labels, tokenizer, max_len=128):
    self.texts = texts
    self.labels = labels
    self.tokenizer = tokenizer
    self.max_len = max_len

  def __len__(self):
    return len(self.texts)

  def __getitem__(self, idx):
    text = str(self.texts[idx])
    label = torch.tensor(self.labels[idx])

    encoding = self.tokenizer(text, truncation=True, padding="max_length", max_length=self.max_len, return_tensors='pt')

    return {
        'input_ids': encoding['input_ids'].flatten(),
        'attention_mask': encoding['attention_mask'].flatten(),
        'labels': label
    }



In [28]:
train_ds_trgt = CustomDataset(X_train_targets, y_train_targets_bin, tokenizer)
val_ds_trgt = CustomDataset(X_val_targets, y_val_targets_bin, tokenizer)
test_ds_trgt = CustomDataset(X_test_targets, y_test_targets_bin, tokenizer)

In [29]:
#CHAT GPT after providing codes errors

def convert_to_hf_dataset(custom_dataset):
    data = {
        'input_ids': [],
        'attention_mask': [],
        'labels': []
    }
    
    for i in range(len(custom_dataset)):
        item = custom_dataset[i]
        data['input_ids'].append(item['input_ids'].numpy())
        data['attention_mask'].append(item['attention_mask'].numpy())
        data['labels'].append(item['labels'].numpy())
        
    hf_dataset = Dataset.from_dict(data)
    return hf_dataset


train_ds_trgt = convert_to_hf_dataset(train_ds_trgt)
val_ds_trgt = convert_to_hf_dataset(val_ds_trgt)
test_ds_trgt = convert_to_hf_dataset(test_ds_trgt)

In [30]:
# Multi-Label Classification Evaluation Metrics
def multi_labels_metrics(predictions, labels, threshold=0.3):
  sigmoid = torch.nn.Sigmoid()
  probs = sigmoid(torch.Tensor(predictions))

  y_pred = np.zeros(probs.shape)
  y_pred[np.where(probs>=threshold)] = 1
  y_true = labels

  accuracy = accuracy_score(y_true, y_pred)
  f1 = f1_score(y_true, y_pred, average = 'macro')
  roc_auc = roc_auc_score(y_true, y_pred, average = 'macro')

  metrics = {
      "roc_auc": roc_auc,
      "f1": f1,
      "accuracy": accuracy
  }

  return metrics

def compute_metrics(p:EvalPrediction):
  preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions

  result = multi_labels_metrics(predictions=preds,
                                labels=p.label_ids)

  return result

## Training

In [83]:
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification

checkpoint = "distilbert-base-uncased"
tokenizer = DistilBertTokenizer.from_pretrained(checkpoint)
model = DistilBertForSequenceClassification.from_pretrained(checkpoint, num_labels=len(consideredClasses),
                                                            problem_type="multi_label_classification")

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [90]:
# Training Arguments
from transformers import TrainingArguments, Trainer

model_name_output_dir = checkpoint.replace("/", "-")+"-targets-ft-hateXplain"

args = TrainingArguments(
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    output_dir = model_name_output_dir,
    num_train_epochs=5,
    save_steps=1000,
    save_total_limit=2,
    evaluation_strategy="epoch"
)

trainer = Trainer(model=model,
                  args=args,
                  train_dataset = train_ds_trgt,
                  eval_dataset = val_ds_trgt,
                  compute_metrics=compute_metrics)

In [91]:
trainer.train()

Epoch,Training Loss,Validation Loss,Roc Auc,F1,Accuracy
1,0.0744,0.148236,0.815824,0.660685,0.605642
2,0.069,0.146872,0.845026,0.68349,0.613385
3,0.0365,0.20355,0.841665,0.676831,0.630531
4,0.0173,0.225633,0.847513,0.684984,0.637721
5,0.0091,0.237129,0.842312,0.676566,0.636062


TrainOutput(global_step=9090, training_loss=0.03889143175811264, metrics={'train_runtime': 852.9423, 'train_samples_per_second': 85.223, 'train_steps_per_second': 10.657, 'total_flos': 2407693103400960.0, 'train_loss': 0.03889143175811264, 'epoch': 5.0})

In [92]:
trainer.evaluate()

{'eval_loss': 0.23712904751300812,
 'eval_roc_auc': 0.8423115948168248,
 'eval_f1': 0.6765660930808567,
 'eval_accuracy': 0.6360619469026548,
 'eval_runtime': 5.5271,
 'eval_samples_per_second': 327.116,
 'eval_steps_per_second': 40.889,
 'epoch': 5.0}

In [93]:
trainer.save_model()

## Uploading and Testing

In [47]:
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification

checkpoint = "distilbert-base-uncased-targets-ft-hateXplain"
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
model = DistilBertForSequenceClassification.from_pretrained(checkpoint, num_labels=len(consideredClasses),
                                                            problem_type="multi_label_classification")



In [50]:
args = TrainingArguments(per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    output_dir = checkpoint,
    num_train_epochs=5,
    evaluation_strategy="epoch"
)

trainer = Trainer(model=model,
                  args=args,
                  train_dataset = train_ds_trgt,
                  eval_dataset = val_ds_trgt,
                  compute_metrics=compute_metrics)

trainer.evaluate()

{'eval_loss': 0.23712904751300812,
 'eval_roc_auc': 0.8423115948168248,
 'eval_f1': 0.6765660930808567,
 'eval_accuracy': 0.6360619469026548,
 'eval_runtime': 5.6211,
 'eval_samples_per_second': 321.642,
 'eval_steps_per_second': 40.205}

In [51]:
preds_targets = trainer.predict(test_ds_trgt)
print(preds_targets.metrics)

{'test_loss': 0.22575204074382782, 'test_roc_auc': 0.8437021346529375, 'test_f1': 0.6968215778219594, 'test_accuracy': 0.6524153248195447, 'test_runtime': 5.5641, 'test_samples_per_second': 323.682, 'test_steps_per_second': 40.617}


In [52]:
text = X_test_targets[50]
print(text,y_test_targets[50])

always thought that nigger was a faggot ['African', 'Homosexual']


In [53]:
encoding = tokenizer(text, return_tensors='pt')
encoding.to(trainer.model.device)
outputs = trainer.model(**encoding)

sigmoid = torch.nn.Sigmoid()
probs = sigmoid(outputs.logits[0].cpu())
preds = np.zeros(probs.shape)
preds[np.where(probs>=0.3)] = 1

multilabel.classes_

multilabel.inverse_transform(preds.reshape(1,-1))

[('African', 'Homosexual')]