In [2]:
import torch
import numpy as np
import pandas as pd
from datasets import load_dataset, Dataset, DatasetDict
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, pipeline, EvalPrediction
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score
from transformers import EvalPrediction

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

2024-06-18 19:15:04.273036: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-18 19:15:04.273132: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-18 19:15:04.409019: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


# HateXplain Dataset

### Content
- **Textual Data**: The dataset contains social media posts from Twitter and Gab.
- **Labels**: Each post is labeled as "normal," "offensive," or "hate," indicating the severity of the language.

### Annotations
- **Label Annotations**: Posts are annotated by multiple human annotators to ensure consistent labeling.
- **Rationales**: Annotators provide explanations highlighting specific parts of the text that influenced their labeling decision.
- **Target Communities**: Annotations include information on which communities or groups are targeted by the hate speech.

In [3]:
dataset = load_dataset("hatexplain", trust_remote_code=True)
dataset

Downloading builder script:   0%|          | 0.00/4.78k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/10.1k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.03M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/145k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/15383 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1922 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1924 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'annotators', 'rationales', 'post_tokens'],
        num_rows: 15383
    })
    validation: Dataset({
        features: ['id', 'annotators', 'rationales', 'post_tokens'],
        num_rows: 1922
    })
    test: Dataset({
        features: ['id', 'annotators', 'rationales', 'post_tokens'],
        num_rows: 1924
    })
})

# Preprocessing
- I'll classify both the offensiveness in a ordinal fashion
- and the targets of the sentence

In [4]:
X_train = dataset["train"]["post_tokens"]
X_val = dataset["validation"]["post_tokens"]
X_test = dataset["test"]["post_tokens"]

X_train = [" ".join(sublist) for sublist in X_train]
X_val = [" ".join(sublist) for sublist in X_val]
X_test = [" ".join(sublist) for sublist in X_test]

y_train_lbl = dataset["train"]["annotators"]
y_val_lbl = dataset["validation"]["annotators"]
y_test_lbl = dataset["test"]["annotators"]

y_train_lbl = [el["label"] for el in y_train_lbl]
y_val_lbl = [el["label"] for el in y_val_lbl]
y_test_lbl = [el["label"] for el in y_test_lbl]

y_train_targets = dataset["train"]["annotators"]
y_val_targets = dataset["validation"]["annotators"]
y_test_targets = dataset["test"]["annotators"]

y_train_targets = [el["target"] for el in y_train_targets]
y_val_targets = [el["target"] for el in y_val_targets]
y_test_targets = [el["target"] for el in y_test_targets]


In [5]:
id1 = 140
id2 = 145

for el1, el2, el3 in zip(X_train[id1:id2],y_train_lbl[id1:id2],y_train_targets[id1:id2]):
    print(el1,el2,el3,sep="\n",end="\n\n")

lesson of the week from the holy book the sovereign individual once commerce flows over the internet suppression of speech becomes unstoppable devolution inevitable and the monopoly of violence by governments will end paraphrased from j d davidson rhys mogg
[1, 1, 1]
[['None'], ['None'], ['None']]

watch bbc on muslims abandoning eu <number> syria the father honestly says it bcs they do not like girls being told they have freewill by supposedly educated pple bcs women don t in islam sharia sharialaw realitycheck fgm misogyny hijab rapeculture abuse violence woman women girl girls womansrights womensrights brexit
[1, 1, 1]
[['Islam', 'Women'], ['Arab', 'Islam', 'Women'], ['Arab', 'Islam']]

<user> whatchu are <user> is a self hating homosexual who has to pay for your homosexual desires
[2, 2, 2]
[['Homosexual'], ['Homosexual'], ['Homosexual']]

the parade needed at least one severed head in honor of muslim culture
[0, 2, 0]
[['Islam'], ['Islam'], ['Islam']]

i look at the mudslime news 

## Preparing Labels

I will average and normalize the label values. The actual situation is that for every sentence we have three different annotation which can take the 3 values:
- 0 - hatespeech
- 1 - normal
- 2 - offensive

I'll first reorder them, so that

- 2 = hate
- 1 = offensive
- 0 = normal

than, since for each sentence there are three different annotation, I'll average them and normalize in the range [0, 1]

(e.g., "they playing a lot of ethnic music at this white ass wedding" has labels [2, 1, 1] -> [1, 0, 0] -> 0.333 -> 0.165)

In [6]:
lbl_map = {
    0:2,
    1:0,
    2:1
}

def avg3(ls):
    return (lbl_map[ls[0]]+lbl_map[ls[1]]+lbl_map[ls[2]])/6

y_train_lbl = [avg3(x) for x in y_train_lbl]
y_val_lbl = [avg3(x) for x in y_val_lbl]
y_test_lbl = [avg3(x) for x in y_test_lbl]

I'll also hold a categorical equivalent:
- normal  (0 -> 0.33)
- offensive (0.33 -> 0.66)
- hatespeech  (0.66 -> 1)

In [7]:
def ordinalToCategorical (hateScore):
    if hateScore < 0.33:
        return 0
    elif hateScore < 0.66:
        return 1
    else:
        return 2
    
y_train_lbl_cat = [ordinalToCategorical(i) for i in y_train_lbl]
y_val_lbl_cat = [ordinalToCategorical(i) for i in y_val_lbl]
y_test_lbl_cat = [ordinalToCategorical(i) for i in y_test_lbl]
y_train_lbl_cat[:20]

[2, 2, 1, 1, 1, 0, 2, 0, 0, 2, 1, 0, 0, 0, 2, 1, 2, 2, 1, 0]

## Preparing Targets

A similar procedure should be done also for the targets, If two of the three annotators mentioned the same target I'll maintain it

In [8]:
# Done by ChatGPT 4o

# Prompt:
# Suppose you have three lists of classes,
# if the same class is in at least 2 classes
# you add it to a new list to return. give me the code

from collections import Counter

def find_common_classes(list1, list2, list3):
    # Combine all lists into one
    combined_list = list1 + list2 + list3
    
    # Create a counter to count occurrences of each element
    counter = Counter(combined_list)
    
    # Create a list to store elements that appear in at least two of the lists
    result = [item for item, count in counter.items() if count >= 2]
    
    return result

In [9]:
y_train_targets = [find_common_classes(trgts[0],trgts[1],trgts[2]) for trgts in y_train_targets]
y_val_targets = [find_common_classes(trgts[0],trgts[1],trgts[2]) for trgts in y_val_targets]
y_test_targets = [find_common_classes(trgts[0],trgts[1],trgts[2]) for trgts in y_test_targets]

all_targets = y_train_targets + y_val_targets + y_test_targets

In [10]:
classes = set()
for ls in all_targets:
    for el in ls:
        classes.add(el)

print(classes)
print(len(classes))

{'Indigenous', 'Jewish', 'Hindu', 'Indian', 'Men', 'Homosexual', 'Other', 'Hispanic', 'Disability', 'Buddhism', 'Asian', 'African', 'Christian', 'Caucasian', 'Arab', 'Women', 'Islam', 'None', 'Refugee', 'Economic'}
20


Removing low incidence groups for tagging

In [11]:
classCounter = dict()

for el in classes:
    classCounter[el] = 0

for ls in all_targets:
    for el in ls:
        classCounter[el] += 1

sorted_classCounter = dict(sorted(classCounter.items(), key=lambda item: item[1], reverse=True))
print("number of posts where class is mentioned",sorted_classCounter,sep="\n",end="\n\n")

consideredClasses = set()

for el in classCounter.items():
    if el[1] > 100:
        consideredClasses.add(el[0])

print("target classes with more than 100 posts",consideredClasses,sep="\n")

number of posts where class is mentioned
{'None': 6514, 'African': 3166, 'Islam': 2111, 'Jewish': 1925, 'Homosexual': 1837, 'Women': 1534, 'Refugee': 848, 'Other': 755, 'Arab': 753, 'Caucasian': 497, 'Asian': 383, 'Hispanic': 357, 'Men': 84, 'Disability': 54, 'Christian': 45, 'Hindu': 17, 'Indian': 10, 'Economic': 9, 'Buddhism': 2, 'Indigenous': 1}

target classes with more than 100 posts
{'Arab', 'Hispanic', 'Women', 'Asian', 'Caucasian', 'African', 'Jewish', 'Islam', 'Homosexual', 'None', 'Refugee', 'Other'}


In [12]:


for i,ls in enumerate(y_train_targets):
    y_train_targets[i] = [consClass for consClass in ls if consClass in consideredClasses]

for i,ls in enumerate(y_val_targets):
    y_val_targets[i] = [consClass for consClass in ls if consClass in consideredClasses]

for i,ls in enumerate(y_test_targets):
    y_test_targets[i] = [consClass for consClass in ls if consClass in consideredClasses]



This code removes:
- entries where the targets appears less than 100 times
- entries where all the annotators disagree on the target

In [13]:
print(len(X_train),len(y_train_targets))

# By ChatGPT 4o [modified]
#
# i have a list of sentences and a list of targets
# I want to remove each entry if the target ls is empty from both lists, how do I do it?

def removeEmptyTargetsEntries(X,y):

    # Filter out entries where the target is empty
    filtered_pairs = [(s, t) for s, t in zip(X, y) if len(t)!=0]

    # Unzip the filtered pairs back into two separate lists
    filtered_sentences, filtered_targets = zip(*filtered_pairs) if filtered_pairs else ([], [])

    # Convert the tuples back to lists (if needed)
    return list(filtered_sentences),list(filtered_targets)

X_train_targets,y_train_targets = removeEmptyTargetsEntries(X_train,y_train_targets)
X_val_targets,y_val_targets = removeEmptyTargetsEntries(X_val,y_val_targets)
X_test_targets,y_test_targets = removeEmptyTargetsEntries(X_test,y_test_targets)


print(len(X_train_targets),len(y_train_targets))

15383 15383
14538 14538


In [14]:
from sklearn.preprocessing import MultiLabelBinarizer

multilabel = MultiLabelBinarizer()

y_train_targets_bin = multilabel.fit_transform(y_train_targets).astype('float32')
y_val_targets_bin = multilabel.transform(y_val_targets).astype('float32')
y_test_targets_bin = multilabel.transform(y_test_targets).astype('float32')

print(X_train_targets[3],y_train_targets[3])
print(y_train_targets_bin[3])


i live and work with many legal mexican immigrants who are great citizens and trump supporters they have no problem with deporting illegals maga ['Hispanic', 'Refugee']
[0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0.]


Sample example of the changed dataset

In [15]:
id1 = 140
id2 = 145

for sen, lbl, catlbl, targets, targets_bin in zip(X_train[id1:id2],y_train_lbl[id1:id2],y_train_lbl_cat[id1:id2],y_train_targets[id1:id2],y_train_targets_bin[id1:id2]):
    print(sen,lbl,f"Category: {catlbl}",targets,targets_bin,end="\n\n",sep="\n")

lesson of the week from the holy book the sovereign individual once commerce flows over the internet suppression of speech becomes unstoppable devolution inevitable and the monopoly of violence by governments will end paraphrased from j d davidson rhys mogg
0.0
Category: 0
['Islam']
[0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]

watch bbc on muslims abandoning eu <number> syria the father honestly says it bcs they do not like girls being told they have freewill by supposedly educated pple bcs women don t in islam sharia sharialaw realitycheck fgm misogyny hijab rapeculture abuse violence woman women girl girls womansrights womensrights brexit
0.0
Category: 0
['Islam', 'Refugee']
[0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0.]

<user> whatchu are <user> is a self hating homosexual who has to pay for your homosexual desires
0.5
Category: 1
['African', 'Homosexual']
[1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]

the parade needed at least one severed head in honor of muslim culture
0.8333333333333334
Category: 2
[

## Preaparing DatasetDict for Bert Models
* the first one is for classyfing between 0,1,2 labels
* the second one for the target groups

In [16]:
train_data = {'text': X_train, 'label': y_train_lbl_cat}
val_data = {'text': X_val, 'label': y_val_lbl_cat}
test_data = {'text': X_test, 'label': y_test_lbl_cat}

df_train = pd.DataFrame(train_data)
df_val = pd.DataFrame(val_data)
df_test = pd.DataFrame(test_data)

train_dataset = Dataset.from_pandas(df_train)
val_dataset = Dataset.from_pandas(df_val)
test_dataset = Dataset.from_pandas(df_test)

hateXplain = DatasetDict({
    "train": train_dataset,
    "validation": val_dataset,
    "test": test_dataset
})

# BERT MODELS

In [None]:
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_function(batch):
    return tokenizer(batch["text"], padding=True, truncation=True)

hateXplain_encoded = hateXplain.map(tokenize_function, batched=True, batch_size=None)

print(hateXplain_encoded["train"][0])

In [None]:
def get_metrics(preds):
  preds_preds = preds.predictions[0] if isinstance(preds.predictions, tuple) else preds.predictions
  predictions = preds_preds.argmax(axis=-1)
  labels = preds.label_ids

  f1 = f1_score(labels, predictions, average='macro')
  accuracy = accuracy_score(labels, predictions)
  
  return {'F1 Score': f1, 'accuracy': accuracy}

## Fine Tuning - {0,1,2} Labels

In [None]:
num_labels = 3
id2label = {0: "normal", 1: "offensive", 2: "hatespeech"}
label2id = {"normal": 0, "offensive": 1, "hatespeech": 2}
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels, id2label=id2label, label2id=label2id)

In [None]:
model

In [None]:
batch_size = 16
epochs = 2
logging_steps = len(hateXplain_encoded["train"]) // batch_size
model_name_output_dir = model_name.replace("/", "-")+"-finetuned-hateXplain"
training_args_ft = TrainingArguments(output_dir=model_name_output_dir,
                                  num_train_epochs=epochs,
                                  learning_rate=1e-4,
                                  per_device_train_batch_size=batch_size,
                                  per_device_eval_batch_size=batch_size,
                                  weight_decay=0.01,
                                  evaluation_strategy="epoch",
                                  disable_tqdm=False,
                                  logging_steps=logging_steps,
                                  log_level="error",
                                  optim='adamw_torch'
                                  )

In [None]:
trainer = Trainer(model=model,
                  args=training_args_ft,
                  compute_metrics=get_metrics,
                  train_dataset=hateXplain_encoded["train"],
                  eval_dataset=hateXplain_encoded["validation"],
                  tokenizer=tokenizer)
trainer.train()

In [None]:
trainer.save_model()
trainer.evaluate()

### Uploading and testing

In [None]:
model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased-finetuned-hateXplain')
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

batch_size = 16
epochs = 2

model_name_output_dir = model_name.replace("/", "-")+"-finetuned-hateXplain"
training_args_ft = TrainingArguments(output_dir=model_name_output_dir,
                                  num_train_epochs=epochs,
                                  learning_rate=1e-4,
                                  per_device_train_batch_size=batch_size,
                                  per_device_eval_batch_size=batch_size,
                                  weight_decay=0.01,
                                  evaluation_strategy="epoch",
                                  disable_tqdm=False,
                                  log_level="error",
                                  optim='adamw_torch'
                                  )

In [None]:
model.to(device)
trainer = Trainer(model=model, args=training_args_ft, compute_metrics=get_metrics, tokenizer=tokenizer)
preds_ft = trainer.predict(hateXplain_encoded['test'])
print(preds_ft.metrics)

In [None]:
classifier = pipeline('text-classification', model=model, tokenizer=tokenizer)
print(classifier('Bruce lee worst chinese actor'))
print(classifier('Bruce lee chinese dog'))
print(classifier("Imagine having bruce lee in the USA government"))

## Linear Probing  - {0,1,2} Labels

In [None]:
print(model_name)
num_labels = 3
id2label = {0: "normal", 1: "offensive", 2: "hatespeech"}
label2id = {"normal": 0, "offensive": 1, "hatespeech": 2}
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels, id2label=id2label, label2id=label2id)

In [None]:
for name, param in model.named_parameters():
    if 'classifier' not in name:
        param.requires_grad = False
    else:  # classifier layer
        print(name)

In [None]:
batch_size = 16
epochs = 5
model_name_output_dir = model_name.replace("/", "-")+"-linearprob-hateXplain"

training_args_lp = TrainingArguments(output_dir=model_name_output_dir,
                                  num_train_epochs=epochs,
                                  learning_rate=1e-4,
                                  per_device_train_batch_size=batch_size,
                                  per_device_eval_batch_size=batch_size,
                                  weight_decay=0.01,
                                  evaluation_strategy="epoch",
                                  disable_tqdm=False,
                                  log_level="error",
                                  optim='adamw_torch'
                                  )

In [None]:
trainer = Trainer(model=model,
                  args=training_args_lp,
                  compute_metrics=get_F1,
                  train_dataset=hateXplain_encoded["train"],
                  eval_dataset=hateXplain_encoded["validation"],
                  tokenizer=tokenizer)
trainer.train()
trainer.save_model()

### Uploading and Testing - Linear Prob

In [None]:
model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased-linearprob-hateXplain')
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
trainer = Trainer(model=model, args=training_args_lp, compute_metrics=get_metrics, tokenizer=tokenizer, eval_dataset=hateXplain_encoded["validation"])
trainer.evaluate()

In [None]:
model.to(device)
preds_probing = trainer.predict(hateXplain_encoded['test'])
print(preds_probing.metrics)

In [None]:
classifier = pipeline('text-classification', model=model, tokenizer=tokenizer)
print(classifier('Bruce lee worst chinese actor'))
print(classifier('Bruce lee chinese dog'))
print(classifier("Imagine having bruce lee in the USA government"))

# Group Tagging with BERT - only finetuning

## Preprocessing Steps

In [None]:
# Lets build custom dataset
class CustomDataset(Dataset):
  def __init__(self, texts, labels, tokenizer, max_len=128):
    self.texts = texts
    self.labels = labels
    self.tokenizer = tokenizer
    self.max_len = max_len

  def __len__(self):
    return len(self.texts)

  def __getitem__(self, idx):
    text = str(self.texts[idx])
    label = torch.tensor(self.labels[idx])

    encoding = self.tokenizer(text, truncation=True, padding="max_length", max_length=self.max_len, return_tensors='pt')

    return {
        'input_ids': encoding['input_ids'].flatten(),
        'attention_mask': encoding['attention_mask'].flatten(),
        'labels': label
    }



In [None]:
train_ds_trgt = CustomDataset(X_train_targets, y_train_targets_bin, tokenizer)
val_ds_trgt = CustomDataset(X_val_targets, y_val_targets_bin, tokenizer)
test_ds_trgt = CustomDataset(X_test_targets, y_test_targets_bin, tokenizer)

In [None]:
#CHAT GPT after providing codes errors

def convert_to_hf_dataset(custom_dataset):
    data = {
        'input_ids': [],
        'attention_mask': [],
        'labels': []
    }
    
    for i in range(len(custom_dataset)):
        item = custom_dataset[i]
        data['input_ids'].append(item['input_ids'].numpy())
        data['attention_mask'].append(item['attention_mask'].numpy())
        data['labels'].append(item['labels'].numpy())
        
    hf_dataset = Dataset.from_dict(data)
    return hf_dataset


train_ds_trgt = convert_to_hf_dataset(train_ds_trgt)
val_ds_trgt = convert_to_hf_dataset(val_ds_trgt)
test_ds_trgt = convert_to_hf_dataset(test_ds_trgt)

In [None]:
# Multi-Label Classification Evaluation Metrics
def multi_labels_metrics(predictions, labels, threshold=0.3):
  sigmoid = torch.nn.Sigmoid()
  probs = sigmoid(torch.Tensor(predictions))

  y_pred = np.zeros(probs.shape)
  y_pred[np.where(probs>=threshold)] = 1
  y_true = labels

  accuracy = accuracy_score(y_true, y_pred)
  f1 = f1_score(y_true, y_pred, average = 'macro')
  roc_auc = roc_auc_score(y_true, y_pred, average = 'macro')

  metrics = {
      "roc_auc": roc_auc,
      "f1": f1,
      "accuracy": accuracy
  }

  return metrics

def compute_metrics(p:EvalPrediction):
  preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions

  result = multi_labels_metrics(predictions=preds,
                                labels=p.label_ids)

  return result

## Training

In [None]:
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification

checkpoint = "distilbert-base-uncased"
tokenizer = DistilBertTokenizer.from_pretrained(checkpoint)
model = DistilBertForSequenceClassification.from_pretrained(checkpoint, num_labels=len(consideredClasses),
                                                            problem_type="multi_label_classification")

In [None]:
# Training Arguments
from transformers import TrainingArguments, Trainer

model_name_output_dir = checkpoint.replace("/", "-")+"-targets-ft-hateXplain"

args = TrainingArguments(
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    output_dir = model_name_output_dir,
    num_train_epochs=5,
    save_steps=1000,
    save_total_limit=2,
    evaluation_strategy="epoch"
)

trainer = Trainer(model=model,
                  args=args,
                  train_dataset = train_ds_trgt,
                  eval_dataset = val_ds_trgt,
                  compute_metrics=compute_metrics)

In [None]:
trainer.train()

In [None]:
trainer.evaluate()

In [None]:
trainer.save_model()

## Uploading and Testing

In [None]:
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification

checkpoint = "distilbert-base-uncased-targets-ft-hateXplain"
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
model = DistilBertForSequenceClassification.from_pretrained(checkpoint, num_labels=len(consideredClasses),
                                                            problem_type="multi_label_classification")

In [None]:
args = TrainingArguments(per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    output_dir = checkpoint,
    num_train_epochs=5,
    evaluation_strategy="epoch"
)

trainer = Trainer(model=model,
                  args=args,
                  train_dataset = train_ds_trgt,
                  eval_dataset = val_ds_trgt,
                  compute_metrics=compute_metrics)

trainer.evaluate()

In [None]:
preds_targets = trainer.predict(test_ds_trgt)
print(preds_targets.metrics)

In [None]:
text = X_test_targets[50]
print(text,y_test_targets[50])

In [None]:
encoding = tokenizer(text, return_tensors='pt')
encoding.to(trainer.model.device)
outputs = trainer.model(**encoding)

sigmoid = torch.nn.Sigmoid()
probs = sigmoid(outputs.logits[0].cpu())
preds = np.zeros(probs.shape)
preds[np.where(probs>=0.3)] = 1

multilabel.classes_

multilabel.inverse_transform(preds.reshape(1,-1))

# BiRNN and simple TL-IDF linear regression

Use simple nltk tokenization

In [17]:
#let's create our tokenizer function to tokenize the sentences
import nltk
nltk.download('punkt')
from nltk.corpus import stopwords
nltk.download('stopwords')
from nltk.tokenize import word_tokenize
import string
punctuations = list(string.punctuation)

stopwords_list = list(stopwords.words('english'))


def nltk_tokenizer(sentence):
    #we lowercase all sentences
    sentence = sentence.lower()

    #here we tokenize it using nltk
    my_tokenized_tokens = word_tokenize(sentence)

    # Removing stop words and punctuations
    mytokens = [word for word in my_tokenized_tokens if word not in stopwords_list and word not in punctuations]

    # return preprocessed list of tokens
    return mytokens

[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [18]:
#tokenize train/test data
print("apply tokenizer to test data")
df_test['tokens'] = df_test['text'].apply(nltk_tokenizer)
df_test['sentence'] =  df_test.tokens.apply(lambda x: ' '.join(x))


print("apply tokenizer to train data")
df_train['tokens'] = df_train['text'].apply(nltk_tokenizer)
df_train['sentence'] =  df_train.tokens.apply(lambda x: ' '.join(x))

apply tokenizer to test data
apply tokenizer to train data


In [19]:
#first we need to define the vocabulary using the training data only!
vocab = set()
for sent in df_train['sentence']:
    for word in sent.split(" "):
        vocab.add(word.strip())

#print(vocab)
print(len(vocab))

24815


In [20]:
documents_train = list(df_train.sentence)
documents_test = list(df_test.sentence)

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer

#tfidf for the training data
vectorizer = CountVectorizer(lowercase=True, vocabulary=vocab)
X_count_train = vectorizer.fit_transform(documents_train)
transformer = TfidfTransformer()
X_tfidf_train = transformer.fit_transform(X_count_train)

#tfidf for the testing data
vectorizer = CountVectorizer(lowercase=True, vocabulary=vocab)
X_count_test = vectorizer.fit_transform(documents_test)
transformer = TfidfTransformer()
X_tfidf_test = transformer.fit_transform(X_count_test)

In [21]:
Y_label_train = df_train['label'].to_list()

from sklearn.linear_model import LogisticRegression
from sklearn import metrics

classifier = LogisticRegression(max_iter = 1000)
classifier.fit(X_tfidf_train, Y_label_train)


y_test = df_test['label'].to_list()
predicted = classifier.predict(X_tfidf_test)
predicted_prob = classifier.predict_proba(X_tfidf_test)
print("Logistic Regression Accuracy:", metrics.accuracy_score(y_test, predicted))
print("Logistic F1 macro:", metrics.f1_score(y_test, predicted, average = "macro"))
print("Logistic ROC AUC :", metrics.roc_auc_score(y_test, predicted_prob, multi_class='ovr'))

Logistic Regression Accuracy: 0.6528066528066528
Logistic Regression Precision: 0.6022145525496255
Logistic Regression Recall: 0.7977744029650685


## Pretrained Glove embedding and BiRNN for target and class prediction

In [22]:
import gensim.downloader
pretraines_glove_model = gensim.downloader.load('glove-wiki-gigaword-100')



IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Now create the embeddings

In [23]:
pretraines_glove_model.most_similar('terrorist')

[('terror', 0.8976657390594482),
 ('terrorists', 0.8589985966682434),
 ('terrorism', 0.8219908475875854),
 ('attacks', 0.8140439391136169),
 ('qaida', 0.7818638682365417),
 ('qaeda', 0.7712634205818176),
 ('bombings', 0.7330332398414612),
 ('extremist', 0.7313344478607178),
 ('militant', 0.7306753396987915),
 ('suspected', 0.7263491153717041)]

In [23]:
import numpy as np

# set to collect unkown words for whic the embeddings is a null vector
unkown_words = set()

    
def get_word_embedding(emb_model,word, emb_dim):

  if word in emb_model: #wv
        return emb_model[word]
  else:
        global unkown_words
        unkown_words.add(word)

        return np.zeros(emb_dim)  # For unknown words


def pad_sequence(embeddings, max_length, embedding_dim):
    if len(embeddings) < max_length:
        padding = np.zeros((max_length - len(embeddings), embedding_dim))
        embeddings = np.vstack((embeddings, padding))
    else:
        embeddings = embeddings[:max_length]
    return embeddings


def create_embedding_vectors(emb_model, df_train, df_test, emb_dim):
    
    global unkown_words
    
    # decide padding dim according to longest sentence in df_train
    max_length = max(len(sentence) for sentence in df_train['tokens'])
    print(f"max lenght: {max_length}")
    
    
    embedded_sentences = []
    
    for sentence in df_train['tokens']:
        words = [word for word in sentence]

        embeddings = [get_word_embedding(emb_model,word,emb_dim) for word in words]
        padded_embeddings = pad_sequence(embeddings, max_length, emb_dim)
        embedded_sentences.append(padded_embeddings)

    embedded_sentences_test = []
    for sentence in df_test['tokens']:
      words = [word for word in sentence]

      embeddings = [get_word_embedding(emb_model,word,emb_dim) for word in words]
      padded_embeddings = pad_sequence(embeddings, max_length, emb_dim)
      embedded_sentences_test.append(padded_embeddings)


    print(f" words not found: {len(unkown_words)}")
    
    
    # Organize data
    data_test = list(zip(np.array(embedded_sentences_test), df_test["label"].to_numpy()))
    data_train = list(zip(np.array(embedded_sentences), df_train["label"].to_numpy()))
    
    return data_train, data_test




## Now we create the data tuple (X,label) for target and hate speech classification


In [26]:
train_target_data = {'text':X_train_targets, 'label':y_train_targets_bin.tolist() }
test_target_data = {'text':X_test_targets, 'label':y_test_targets_bin.tolist() }

df_train_target = pd.DataFrame(train_target_data)
df_test_target  = pd.DataFrame(test_target_data )


df_train_target['tokens'] = df_train_target['text'].apply(nltk_tokenizer)
df_train_target['sentence'] =  df_train_target.tokens.apply(lambda x: ' '.join(x))

df_test_target['tokens'] = df_test_target['text'].apply(nltk_tokenizer)
df_test_target['sentence'] =  df_test_target.tokens.apply(lambda x: ' '.join(x))

data_train, data_test = create_embedding_vectors(emb_model = pretraines_glove_model, df_train = df_train, df_test = df_test, emb_dim = pretraines_glove_model.vector_size)

data_train_targets, data_test_targets = create_embedding_vectors(emb_model = pretraines_glove_model, df_train = df_train_target, df_test = df_test_target, emb_dim = pretraines_glove_model.vector_size)

max lenght: 144
 words not found: 5171
max lenght: 144
 words not found: 5171


Now we will create a custom dataset to train the birnn pytorch model for text classification

In [27]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

class GloveDataset(Dataset):
    def __init__(self, data):
        self.dataset = data

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        embeddings, label = self.dataset[idx]
        embeddings = torch.tensor(embeddings, dtype=torch.float32)
        label = torch.tensor(label, dtype=torch.long)
        embeddings = embeddings.to(device)
        label = label.to(device)
        return embeddings, label

class GloveDataset_target(Dataset):
    def __init__(self, data):
        self.dataset = data

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        embeddings, label = self.dataset[idx]
        embeddings = torch.tensor(embeddings, dtype=torch.float32)
        label = torch.tensor(label, dtype=torch.float32)
        embeddings = embeddings.to(device)
        label = label.to(device)
        return embeddings, label



batch_size = 16

# hate_speech classification dataloaders
dataset = GloveDataset(data_train)
dataloader_glove_train = DataLoader(dataset, batch_size=batch_size, shuffle=True)

test_dataset = GloveDataset(data_test)
dataloader_glove_test = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# target classification dataloaders

dataset_target = GloveDataset_target(data_train_targets)
dataloader_glove_target_train = DataLoader(dataset_target, batch_size=batch_size, shuffle=True)

test_dataset_target = GloveDataset_target(data_test_targets)
dataloader_glove_target_test = DataLoader(test_dataset_target, batch_size=batch_size, shuffle=True)

### Generic readapted BiRNN model
source: https://github.com/hate-alert/HateXplain/blob/master/Models/otherModels.py

In [30]:
from torch import nn

class BiRNN(nn.Module):  
    def __init__(self,args):
        super(BiRNN, self).__init__()
        
        self.hidden_size = args['hidden_size']
        self.batch_size = args['batch_size']
        self.drop_embed=args['drop_embed']
        self.drop_fc=args['drop_fc']
        self.drop_hidden=args['drop_hidden']
        self.seq_model_name=args["seq_model"]
        self.embedsize=args["embed_size"]
        self.num_layers = args["num_layers"]
  
       

        
        if(args["seq_model"]=="lstm"):
            self.seq_model = nn.LSTM(args["embed_size"], self.hidden_size,num_layers =self.num_layers, bidirectional=True, batch_first=True,dropout=self.drop_hidden)
        elif(args["seq_model"]=="gru"):
            self.seq_model = nn.GRU(args["embed_size"], self.hidden_size, num_layers=self.num_layers, bidirectional=True, batch_first=True,dropout=self.drop_hidden) 
            
        self.linear1 = nn.Linear(2 * self.hidden_size*self.num_layers, self.hidden_size)
        self.linear2 = nn.Linear(self.hidden_size, args['num_classes'])
        self.dropout_embed = nn.Dropout2d(self.drop_embed)
        self.dropout_fc = nn.Dropout(self.drop_fc)
        self.num_labels=args['num_classes']
        
        
        
    def forward(self,X):
        batch_size = X.size(0)
        h_embedding = torch.squeeze(self.dropout_embed(torch.unsqueeze(X, 0))).view(batch_size, X.shape[1], self.embedsize)
        
        # Forward propagate through LSTM/GRU
        if self.seq_model_name == "lstm":
            _, hidden = self.seq_model(h_embedding)
            hidden = hidden[0]
        else:
            _, hidden = self.seq_model(h_embedding)

       
     
        hidden = hidden.transpose(0, 1).contiguous().view(X.size(0), -1) 
        hidden = self.dropout_fc(hidden)
        hidden = torch.relu(self.linear1(hidden))  #batch x hidden_size
        hidden = self.dropout_fc(hidden)
        logits = self.linear2(hidden)
        
        return (logits)
    
    
    
    def init_hidden(self, batch_size):
        return cuda_available(torch.zeros(2, self.batch_size, self.hidden_size))
    



In [31]:
# model for hatespeech classification
args_dict_classification_hate = {
        "batch_size":16,
        "hidden_size":320,
        "embed_size":100,
        "num_classes" : 3,
        "num_layers":3,
        "drop":0.1,
        "learning_rate":0.001,
        "seq_model":"lstm",
        "drop_embed":0.1,
        "drop_fc":0.1,
        "drop_hidden":0.1,
        }
    
BiRNN_for_hate_class = BiRNN(args_dict_classification_hate)
BiRNN_for_hate_class.to(device)

BiRNN(
  (seq_model): LSTM(100, 320, num_layers=3, batch_first=True, dropout=0.1, bidirectional=True)
  (linear1): Linear(in_features=1920, out_features=320, bias=True)
  (linear2): Linear(in_features=320, out_features=3, bias=True)
  (dropout_embed): Dropout2d(p=0.1, inplace=False)
  (dropout_fc): Dropout(p=0.1, inplace=False)
)

12

In [39]:
#model for target classification 

n_classes = len(y_train_targets_bin[0])
args_dict_classification_target = {
        "batch_size":16,
        "hidden_size":320,
        "embed_size":100,
        "num_classes" : n_classes ,
        "num_layers":3,
        "drop":0.1,
        "learning_rate":0.001,
        "seq_model":"lstm",
        "drop_embed":0.1,
        "drop_fc":0.1,
        "drop_hidden":0.1,
        }
    
BiRNN_for_target_class = BiRNN(args_dict_classification_target)
BiRNN_for_target_class.to(device)

BiRNN(
  (seq_model): LSTM(100, 320, num_layers=3, batch_first=True, dropout=0.1, bidirectional=True)
  (linear1): Linear(in_features=1920, out_features=320, bias=True)
  (linear2): Linear(in_features=320, out_features=12, bias=True)
  (dropout_embed): Dropout2d(p=0.1, inplace=False)
  (dropout_fc): Dropout(p=0.1, inplace=False)
)

In [40]:

from torch import nn
from sklearn import metrics
import torch.nn.functional as F

def calculate_metrics(preds, labels, task):
    # preds are softmax if task is hate_speech otherwise sigmoid
    if task == 'hate_speech':
        preds_label = np.argmax(preds, axis=1)
    elif task == 'target_clf':
        preds = torch.sigmoid(torch.tensor(preds)).numpy()
        preds_label = (preds > 0.5).astype(int)
    
    else:
        raise ValueError("please provde a valid task between ['hate_speech', 'target_clf']")
    
    
    accuracy = metrics.accuracy_score(labels, preds_label)
    macro_f1 = metrics.f1_score(labels, preds_label, average = "macro")
    auroc = metrics.roc_auc_score(labels, preds, multi_class='ovr')
    
    return accuracy, macro_f1, auroc


def custom_trainer(model, dataloader, num_epochs, criterion, optimizer, task = None):
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for embeddings, labels in dataloader:
            # Move tensors to the configured device
            
            embeddings = embeddings.to(device)
            labels = labels.to(device)
            

            # Forward pass
            outputs = model(embeddings)
            loss = criterion(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        avg_loss = running_loss / len(dataloader)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')

        # Validation step
        model.eval()
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for embeddings, labels in dataloader:
                embeddings = embeddings.to(device)
                labels = labels.to(device)
                outputs = model(embeddings) 
                
                if task == 'hate_speech':
                    outputs = F.softmax(outputs, dim=1)
                
                    
                all_preds.extend(outputs.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        accuracy, macro_f1, auroc = calculate_metrics(all_preds, all_labels,task=task)
        print(f'Accuracy: {accuracy:.4f}, Macro F1: {macro_f1:.4f},AUROC: {auroc:.4f}')
    
    return model

    

### Train the hate speech classifier

In [45]:
from torch import optim
from torch import nn

criterion = criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(BiRNN_for_hate_class.parameters(), lr=0.001)

BiRNN_for_hate_class = custom_trainer(BiRNN_for_hate_class, dataloader_glove_train, 5,criterion,optimizer,task = 'hate_speech' )

Epoch [1/5], Loss: 0.8574
VAccuracy: 0.6592, Macro F1: 0.6111,AUROC: 0.8187
Epoch [2/5], Loss: 0.8138
VAccuracy: 0.6905, Macro F1: 0.6388,AUROC: 0.8468
Epoch [3/5], Loss: 0.8250
VAccuracy: 0.4876, Macro F1: 0.3469,AUROC: 0.6043
Epoch [4/5], Loss: 0.9862
VAccuracy: 0.5497, Macro F1: 0.4136,AUROC: 0.7290
Epoch [5/5], Loss: 0.8977
VAccuracy: 0.6113, Macro F1: 0.4601,AUROC: 0.7750


### Train target classifier 


In [41]:
from torch import optim
from torch import nn

criterion = nn.BCEWithLogitsLoss() # combines a sigmoid layer and the binary cross-entropy loss in a single class for a good multi-class multi target classification loss
optimizer = optim.Adam(BiRNN_for_target_class.parameters(), lr=0.001)

BiRNN_for_target_class = custom_trainer(BiRNN_for_target_class, dataloader_glove_target_train, 10, criterion,optimizer, task = 'target_clf' )

Epoch [1/10], Loss: 0.2242
VAccuracy: 0.4758, Macro F1: 0.4131,AUROC: 0.8801
Epoch [2/10], Loss: 0.1724
VAccuracy: 0.5634, Macro F1: 0.5537,AUROC: 0.9243
Epoch [3/10], Loss: 0.1525
VAccuracy: 0.6478, Macro F1: 0.6190,AUROC: 0.9489
Epoch [4/10], Loss: 0.1401
VAccuracy: 0.6764, Macro F1: 0.6324,AUROC: 0.9604


KeyboardInterrupt: 

## Test and final pipeline

In [42]:
def evaluate_model(model, dataloader,task):# mode eval
    
    model.to(device)
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for embeddings, labels in dataloader:
            embeddings = embeddings.to(device)
            labels = labels.to(device)
            outputs = model(embeddings) 
                
            if task == 'hate_speech':
                outputs = F.softmax(outputs, dim=1)
                
                    
            all_preds.extend(outputs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy, macro_f1, auroc = calculate_metrics(all_preds, all_labels,task=task)
    print(f'VAccuracy: {accuracy:.4f}, Macro F1: {macro_f1:.4f},AUROC: {auroc:.4f}')
    



### Test Hate classification task

In [None]:
evaluate_model(BiRNN_for_hate_class, dataloader_glove_test, task =  'hate_speech')

### Test Target classification task

In [43]:
evaluate_model(BiRNN_for_target_class, dataloader_glove_target_test, task = 'target_clf')

VAccuracy: 0.5947, Macro F1: 0.5938,AUROC: 0.9277


save model

In [70]:
torch.save(BiRNN_for_hate_class, 'BiRNN_for_hate_class.pth')
torch.save(BiRNN_for_hate_class, 'BiRNN_for_target_class.pth')


## Final wrapper for custom input 

Output hate score and targets

In [67]:
def predict(text,hate_model,target_model, emb_model,  max_len=144, emb_dim = 100):
    # Tokenize the input text

    tokens = nltk_tokenizer(text)
    predict_embeddings = []
    emb_list = []
    
    embeddings = [get_word_embedding(emb_model,token,emb_dim) for token in tokens] 
    
        
    padded_embeddings = pad_sequence(embeddings, max_len, emb_dim)
        
    predict_embeddings.append(padded_embeddings)


    tensor_emb = torch.tensor(predict_embeddings, dtype= torch.float32)
    tensor_emb = tensor_emb.to(device)
    
    predictions = []
    # Perform prediction for hate
    with torch.no_grad():
        outputs = hate_model(tensor_emb)
        proba = F.softmax(outputs,dim=1)
        
        predictions.append(proba.cpu())
    
    # perform prediction for targets
    with torch.no_grad():
        outputs = target_model(tensor_emb)
        
       
        predictions.append(outputs.cpu())
        
    return predictions
    
    

# Example custom input
custom_input = "always thought that nigger was a faggot"

# Predict the class for the custom input
predict = predict(text = custom_input, hate_model = BiRNN_for_hate_class, target_model = BiRNN_for_target_class,emb_model = pretraines_glove_model)


hate_argmax = np.argmax(predict[0][0]).numpy().item()

hate_dict = {0: 'normal', 1:'offensive', 2:'hate speech'}

hate_label = hate_dict[hate_argmax]
hate_label_score = predict[0][0][hate_argmax].numpy().item()


multilabel.classes_

sigmoid = torch.nn.Sigmoid()
probs = sigmoid(predict[1][0].cpu())
preds = np.zeros(probs.shape)
preds[np.where(probs>=0.3)] = 1

target_label = multilabel.inverse_transform(preds.reshape(1,-1))


print(f'Hate score: {hate_label}')

print(f'Probability: {hate_label_score}')
      
print(f'Targets: {target_label}')




Hate score: hate speech
Probability: 0.9415609240531921
Targets: [('African', 'Homosexual')]


### Train 