BERT for hate speech detection

## Librería Transformers

In [None]:
!pip install transformers
!pip install sentencepiece
!pip install ipywidgets
!pip install gdown
!pip install accelerate
!jupyter nbextension enable --py widgetsnbextension

Collecting transformers
  Downloading transformers-4.35.0-py3-none-any.whl (7.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m52.7 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m31.5 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m92.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m80.1 MB/s[0m eta [36m0:00:00[0m
Col

In [None]:
import torch
import transformers
import torch.nn as nn
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm

from transformers import Trainer, TrainingArguments
from transformers import BertTokenizer
from transformers import BertPreTrainedModel, BertModel

import pandas as pd
import numpy as np
import os

from sklearn.metrics import classification_report

if not torch.cuda.is_available():
  DEVICE = 'cpu'
else:
  DEVICE = 'cuda:0'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

%cd '/content/drive/MyDrive/Colab Notebooks/'
%mkdir './transformers'
%cd './transformers'

Mounted at /content/drive
/content/drive/MyDrive/Colab Notebooks
mkdir: cannot create directory ‘./transformers’: File exists
/content/drive/MyDrive/Colab Notebooks/transformers


## Hate Speech Classification

Dataset

Para la tarea de clasificación de discursos de odio, trabajaremos con el [Offensive Language Identification Dataset - OLID ](https://scholar.harvard.edu/malmasi/olid). Es un conjunto de datos de tweets anotados jerárquicamente en tres niveles:

* Nivel A: Detección de Lenguaje Ofensivo
* Nivel B: Categorización del Lenguaje Ofensivo
* Nivel C: Identificación del Objetivo del Lenguaje Ofensivo



In [None]:
%mkdir -p ./data
%cd ./data

if not os.path.isfile('pretrain.txt'):
  !wget -O pretrain.txt https://www.dropbox.com/s/bavjtyx0ndty7xt/pretrain.txt?dl=0

if not os.path.isfile('OLIDv1.0.zip'):
  !gdown --id 1Tksi8UyzW-drFWd7maGr7MoHVa-VHQCO -O OLIDv1.0.zip
  !unzip OLIDv1.0.zip

%cd ..


/content/drive/MyDrive/Colab Notebooks/transformers/data
/content/drive/MyDrive/Colab Notebooks/transformers


In [None]:
%pwd
%ls

[0m[01;34mdata[0m/


El conjunto de datos ``OLID`` fue etiquetado para tres subtareas, por lo tanto, tenemos tres diferentes conjuntos de etiquetas por tweet:
* Tarea A: No Ofensivo (``NOT``) y Ofensivo (``OFF``).
* Tarea B: Insulto Dirigido (``TIN``), No Dirigido (``UNT``) y ``NULL`` para tweets no ofensivos.
* Tarea C: Individuo (``IND``), Grupo (``GRP``), Otro (``OTH``) y ``NULL`` para tweets no ofensivos y no dirigidos.


In [None]:
df = pd.read_csv('data/olid-training-v1.0.tsv',delimiter="\t")

print(f'Number of training samples: {len(df)}')

df.head()

Number of training samples: 13240


Unnamed: 0,id,tweet,subtask_a,subtask_b,subtask_c
0,86426,@USER She should ask a few native Americans wh...,OFF,UNT,
1,90194,@USER @USER Go home you’re drunk!!! @USER #MAG...,OFF,TIN,IND
2,16820,Amazon is investigating Chinese employees who ...,NOT,,
3,62688,"@USER Someone should'veTaken"" this piece of sh...",OFF,UNT,
4,43605,@USER @USER Obama wanted liberals &amp; illega...,NOT,,


Preprocesando el Corpus

Let's define ``reader_train`` and ``reader_test`` that will prepare our data corpus and labels for both train and test set.

In [None]:
def reader_train(file_name):
    texts = []
    labels = []
    fin = open(file_name)
    title = fin.readline()
    set_a = ['NOT' , 'OFF']
    set_b = ['NULL', 'TIN', 'UNT']
    set_c = ['NULL', 'IND', 'GRP', 'OTH']
    while True:
        line = fin.readline()
        if not line:
            break
        items = line.split('\t')
        text = items[1]
        label_a = set_a.index(items[2].strip())
        label_b = set_b.index(items[3].strip())
        label_c = set_c.index(items[4].strip())

        if len(text) > 0:
            texts.append(text)
            labels.append([label_a, label_b, label_c])

    return {'texts':texts, 'labels':labels}

In [None]:
def reader_test(test_textlist, test_labellist):
    texts = []
    labels = []
    text_dict = {}

    # build text_dict
    for file_text in test_textlist:
        fin = open(file_text)
        title = fin.readline()
        while True:
            line = fin.readline()
            if not line:
                break
            items = line.split('\t')
            if items[0] not in text_dict:
                text_dict[items[0]] = items[1]
        fin.close()
    label_dict_list = []

    # build label_dict
    for i, file_label in enumerate(test_labellist):
        label_dict_list.append({})
        fin = open(file_label)
        title = fin.readline()
        while True:
            line = fin.readline()
            if not line:
                break
            items = line.split(',')
            label_dict_list[i][items[0]] = items[1]
        fin.close()

    set_a = ['NOT' , 'OFF']
    set_b = ['NULL', 'TIN', 'UNT']
    set_c = ['NULL', 'IND', 'GRP', 'OTH']

    for idx, text in text_dict.items():
        if len(text) > 0:
            texts.append(text)
            if idx in label_dict_list[0]:
                label_a = label_dict_list[0][idx]
            else:
                label_a = 'OFF'
            if idx in label_dict_list[1]:
                label_b = label_dict_list[1][idx]
            else:
                label_b = 'NULL'
            if idx in label_dict_list[2]:
                label_c = label_dict_list[2][idx]
            else:
                label_c = 'NULL'

            label_a = set_a.index(label_a.strip())
            label_b = set_b.index(label_b.strip())
            label_c = set_c.index(label_c.strip())

            labels.append([label_a, label_b, label_c])

    return {'texts':texts, 'labels':labels}


In [None]:
class OlidDataset(torch.utils.data.Dataset):

    def __init__(self, tokenizer, input_set):

        self.tokenizer = tokenizer
        self.texts = input_set['texts']
        self.labels = input_set['labels']

    def collate_fn(self, batch):

        texts = []
        labels_a = []
        labels_b = []
        labels_c = []
        for b in batch:
            texts.append(b['text'])
            labels_a.append(b['label_a'])
            labels_b.append(b['label_b'])
            labels_c.append(b['label_c'])

        #The maximum sequence size for BERT is 512 but here the tokenizer truncate sentences longer than 128 tokens.
        # We also pad shorter sentences to a length of 128 tokens
        encodings = self.tokenizer(texts, return_tensors='pt', padding=True, truncation=True, max_length=128)
        labels = {}
        encodings['label_a'] =  torch.tensor(labels_a)
        encodings['label_b'] =  torch.tensor(labels_b)
        encodings['label_c'] =  torch.tensor(labels_c)

        return encodings

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):

        item = {'text': self.texts[idx],
                'label_a': self.labels[idx][0],
                'label_b': self.labels[idx][1],
                'label_c': self.labels[idx][2]}
        return item

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

# we can check the parameters of this tokenizer
tokenizer

Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

BertTokenizer(name_or_path='bert-base-cased', vocab_size=28996, model_max_length=512, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	100: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	101: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	102: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	103: AddedToken("[MASK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}

In [None]:
trainset = reader_train('./data/olid-training-v1.0.tsv')
testset = reader_test(['./data/testset-levela.tsv','./data/testset-levelb.tsv','./data/testset-levelc.tsv'],
                      ['./data/labels-levela.csv','./data/labels-levelb.csv','./data/labels-levelc.csv'])

train_dataset = OlidDataset(tokenizer, trainset)
test_dataset = OlidDataset(tokenizer, testset)

The following code let's you play around with our ``train_dataset`` object.

In [None]:
#returns first item as dictionnary
#print(train_dataset[0])

# put all train set into one batch for the collate_fn function
batch = [sample for sample in train_dataset]

encodings = train_dataset.collate_fn(batch[:10])

for key, value in encodings.items():
  print(f"{key}: {value.numpy().tolist()}")



input_ids: [[101, 137, 1646, 9637, 1153, 1431, 2367, 170, 1374, 2900, 4038, 1184, 1147, 1321, 1113, 1142, 1110, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 137, 1646, 9637, 137, 1646, 9637, 3414, 1313, 1128, 787, 1231, 6882, 106, 106, 106, 137, 1646, 9637, 108, 9960, 10583, 108, 8499, 10973, 10973, 100, 158, 20550, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 9786, 1110, 11950, 1922, 4570, 1150, 1132, 4147, 4422, 2233, 1106, 1503, 118, 1710, 18275, 1116, 1702, 1111, 1126, 2652, 1107, 1103, 6591, 24210, 119, 158, 20550, 108, 9786, 108, 9960, 10583, 108, 148, 22689, 108, 24890, 11607, 1592, 108, 157, 15678, 1942, 102, 0, 0, 0], [101, 107, 137, 1646, 9637, 6518, 1431, 112, 1396, 1942, 9899, 1179, 107, 107, 1142, 2727, 1104, 4170, 1106, 170, 15406, 119, 100, 107, 107, 107, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 137, 1646, 9637, 137, 1646, 9637, 7661, 1458, 7691, 1116, 111, 1

### Finetuning a pre-trained BERT model

In [None]:
model = BertModel.from_pretrained("bert-base-cased")

#08 M
print(f"Model size: {model.num_parameters()}")

#model summary
model

Downloading model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Model size: 108310272


BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(28996, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
  

Note that the model has only encoder layers.

#### BERT Model

Para definir nuestro modelo, construiremos a partir de un modelo preentrenado de Huggingface y lo adaptaremos a nuestra tarea. Utilizaremos BertModel para extraer incrustaciones y añadiremos una capa Lineal para clasificar las muestras. La implementación de BERT de Hugging face puede manejar diferentes variaciones del modelo, las cuales definimos y pasamos sus valores de parámetros mediante config.

El código a continuación define un modelo adaptado para clasificar tweets en el Nivel A, Detección de Lenguaje Ofensivo. Implementaremos la Tarea B y C más adelante.



The code below defines a model adapted to classify tweets on Level A, Offensive Language Detection. We will implement Task B and C later.



In [None]:
class BERT_hate_speech(BertPreTrainedModel):

    def __init__(self, config):
        super().__init__(config)

        # BERT Model
        self.bert = BertModel(config)

        # Task A
        self.projection_a = torch.nn.Sequential(torch.nn.Dropout(0.2),
                                                torch.nn.Linear(config.hidden_size, 2))

        # Task B
        # Hacer

        # Task C
        # Hacer

        self.init_weights()

    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None):

        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        # Logits A
        logits_a = self.projection_a(outputs[1])

        return logits_a


#### Finetuning

*Vamos a especializar la clase [``Trainer``](https://huggingface.co/transformers/main_classes/trainer.html#transformers.Trainer) para poder usar funciones de pérdida customizadas.


We build our custom ``Trainer`` class to incorporate our own ``compute_loss`` function over the three labels.

In [None]:

class Trainer_hate_speech(Trainer):
    def compute_loss(self, model, inputs):
        labels = {}
        labels['label_a'] = inputs.pop('label_a')
        labels['label_b'] = inputs.pop('label_b')
        labels['label_c'] = inputs.pop('label_c')

        outputs = model(**inputs) #logits

        # TASK A
        loss_task_a = nn.CrossEntropyLoss()
        labels_a = labels['label_a']
        loss_a = loss_task_a(outputs.view(-1, 2), labels_a.view(-1))

        loss = loss_a

        return loss

In [None]:
def main_hate_speech():

    #call our custom BERT model and pass as parameter the name of an available pretrained model
    model = BERT_hate_speech.from_pretrained("bert-base-cased")

    training_args = TrainingArguments(
        output_dir='./experiment/hate_speech',
        learning_rate = 0.0001, #2e-5
        logging_steps= 100,
        per_device_train_batch_size=32,
        num_train_epochs = 3,
        remove_unused_columns=False # This argument prevents the collator to drop data from our batch when customizing the data collator
    )
    trainer = Trainer_hate_speech(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        data_collator=train_dataset.collate_fn,
    )

    trainer.train()

    trainer.save_model('./models/ht_bert_finetuned/')



In [None]:
main_hate_speech()

Some weights of BERT_hate_speech were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['projection_a.1.weight', 'projection_a.1.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6334


bold text#### Evaluación
Once we trained our model, we can evaluate it on our test set.

In [None]:
def predict_hatespeech(input, tokenizer, model):
  model.eval()
  encodings = tokenizer(input, return_tensors='pt', padding=True, truncation=True, max_length=128)

  output = model(**encodings)
  preds = torch.max(output, 1)

  return {'prediction':preds[1], 'confidence':preds[0]}

In [None]:
def evaluate(model, tokenizer, data_loader):

  total_count = 0
  correct_count = 0

  preds = []
  tot_labels = []

  with torch.no_grad():
    for data in tqdm(data_loader):

      labels = {}
      labels['label_a'] = data['label_a']

      tweets = data['text']

      pred = predict_hatespeech(tweets, tokenizer, model)

      preds.append(pred['prediction'].tolist())
      tot_labels.append(labels['label_a'].tolist())

  # with the saved predictions and labels we can compute accuracy, precision, recall and f1-score
  report = classification_report(tot_labels, preds, target_names=["Not offensive","Offensive"], output_dict= True)

  return report

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

#your saved model name here
model_name = './models/ht_bert_finetuned/'
model = BERT_hate_speech.from_pretrained(model_name)

# we don't batch our test set unless it's too big
test_loader = DataLoader(test_dataset)

report = evaluate(model, tokenizer, test_loader)

print(report)

print(report['accuracy'])
print(report['Not offensive']['f1-score'])
print(report['Offensive']['f1-score'])

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
model = BERT_hate_speech.from_pretrained('./models/ht_bert_finetuned/')

print(predict_hatespeech("I go see pinguins at the zoo.", tokenizer, model))
print(predict_hatespeech("Bananas are stupid", tokenizer, model))

## Multi-task Hate Speech Classification

### Multi-task Model

In [None]:

class BERT_hate_speech_multitask(BertPreTrainedModel):

    def __init__(self, config):
        super().__init__(config)

        # BERT Model
        self.bert = BertModel(config)

        # Task A
        self.projection_a = torch.nn.Sequential(torch.nn.Dropout(0.2),
                                                torch.nn.Linear(config.hidden_size, 2))

        ##  Question 3 ##

        # Task B
        self.projection_b = torch.nn.Sequential(torch.nn.Dropout(0.2),
                                                torch.nn.Linear(config.hidden_size, 3))

        # Task C
        self.projection_c = torch.nn.Sequential(torch.nn.Dropout(0.2),
                                                torch.nn.Linear(config.hidden_size, 4))

        self.init_weights()

    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None):

        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        # Task A
        logits_a = self.projection_a(outputs[1])

        # Task B
        logits_b = self.projection_b(outputs[1])

        # Task C
        logits_c = self.projection_c(outputs[1])

        return (logits_a, logits_b, logits_c)

In [None]:
class Trainer_hate_speech_multitask(Trainer):
    def compute_loss(self, model, inputs):
        labels = {}
        labels['label_a'] = inputs.pop('label_a')
        labels['label_b'] = inputs.pop('label_b')
        labels['label_c'] = inputs.pop('label_c')

        (out_a, out_b, out_c) = model(**inputs)

        # LOSS A
        loss_task_a = nn.CrossEntropyLoss()
        labels_a = labels['label_a']
        loss_a = loss_task_a(out_a.view(-1, 2), labels_a.view(-1))

        # LOSS B
        loss_task_b = nn.CrossEntropyLoss()
        labels_b = labels['label_b']
        loss_b = loss_task_b(out_b.view(-1, 3), labels_b.view(-1))

        # LOSS C
        loss_task_c = nn.CrossEntropyLoss()
        labels_c = labels['label_c']
        loss_c = loss_task_c(out_c.view(-1, 4), labels_c.view(-1))

        loss = loss_a + loss_b + loss_c

        return loss

In [None]:
def main_hate_speech_multitask():

    model = BERT_hate_speech_multitask.from_pretrained("bert-base-cased")

    training_args = TrainingArguments(
        output_dir='./experiment/hate_speech_multitask',
        learning_rate = 0.0001,
        logging_steps= 100,
        num_train_epochs = 3,
        per_device_train_batch_size=64,
        remove_unused_columns=False
    )
    trainer = Trainer_hate_speech_multitask(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        data_collator=train_dataset.collate_fn
    )
    trainer.train()

    trainer.save_model('./models/ht_bert_multi_finetuned/')

In [None]:
main_hate_speech_multitask()

### Evaluación

In [None]:
def predict_hatespeech_multitask(input, tokenizer, model):
  model.eval()
  encodings = tokenizer(input, return_tensors='pt', padding=True, truncation=True, max_length=128)

  (out1, out2, out3) = model(**encodings)

  preds_a = torch.max(out1, 1)
  preds_b = torch.max(out2, 1)
  preds_c = torch.max(out3, 1)

  preds = (preds_a[1], preds_b[1], preds_c[1])
  scores = (preds_a[0], preds_b[0], preds_c[0])

  return {'predictions':preds, 'confidences':scores}

In [None]:
def evaluate_multitask(model, tokenizer, data_loader):

  task_num = 3
  total_count = 0
  correct_count = [0] * task_num
  accuracies = [0] * task_num

  batch_size = data_loader.batch_size

  with torch.no_grad():
    for data in tqdm(data_loader):

      labels = {}
      labels['label_a'] = data['label_a']
      labels['label_b'] = data['label_b']
      labels['label_c'] = data['label_c']

      tweets = data['text']

      pred = predict_hatespeech_multitask(tweets, tokenizer, model)

      preds = pred['predictions']

      for i, label in enumerate(labels):
        correct_count[i]+= torch.mean((preds[i] == labels[label]).float())

      total_count += batch_size

    for i, label in enumerate(labels):
      accuracies[i] = (correct_count[i]/total_count)


  return accuracies

In [None]:

model = BERT_hate_speech_multitask.from_pretrained("./models/ht_bert_multi_finetuned/")
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

test_loader = DataLoader(test_dataset)

accuracies = evaluate_multitask(model, tokenizer, test_loader)


In [None]:
for i in range(3):
    print('Task %d accuracy: %2.2f %%' % (i, 100.0*accuracies[i]))


In [None]:
print(predict_hatespeech_multitask("I go see pinguins at the zoo.", tokenizer, model)['predictions'])
print(predict_hatespeech_multitask("Bananas are so stupid ", tokenizer, model)['predictions'])