In [1]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, f1_score

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import AdamW

from transformers import BertTokenizer, BertForSequenceClassification
from tqdm import tqdm

## Reading the data

In [2]:
data = pd.read_csv('metahate_train.tsv', sep='\t', names=['label', 'text'],header=0)
data = data.dropna(subset=['text'])

data['text'] = data['text'].astype(str)

texts_train = data['text'].tolist()
labels_train = data['label'].tolist()

data = pd.read_csv('metahate_test.tsv', sep='\t', names=['label', 'text'],header=0)

data = data.dropna(subset=['text'])

data['text'] = data['text'].astype(str)

texts_test = data['text'].tolist()
labels_test = data['label'].tolist()


## Loading BERT model

In [3]:
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)

model = BertForSequenceClassification.from_pretrained(
    model_name, 
    num_labels=2 # Configuring the model for binary classification (2 labels)
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Tokenizing the training and testing data

In [6]:
def batch_tokenize(texts, tokenizer, batch_size=1000):
    input_ids = []
    attention_masks = []
    
    for i in range(0, len(texts), batch_size):
        batch = texts[i:i+batch_size]
        encoded = tokenizer(
            batch,
            padding='max_length',
            truncation=True,
            max_length=512,
            return_tensors='pt'
        )
        input_ids.append(encoded['input_ids'])
        attention_masks.append(encoded['attention_mask'])
        
    return {
        'input_ids': torch.cat(input_ids),
        'attention_mask': torch.cat(attention_masks)
    }

X_train_tokens = batch_tokenize(texts_train, tokenizer)
X_test_tokens = batch_tokenize(texts_test, tokenizer)

## Converting the training and testing labels to PyTorch tensors

In [9]:
y_train_tensor = torch.tensor(labels_train)
y_test_tensor = torch.tensor(labels_test)

## Defining class weights for imbalanced classes

In [10]:
class_weights = torch.tensor([0.63, 2.42]) # Hate texts have a higher weight as are only the 20.64% of the data.
loss_function = nn.CrossEntropyLoss(weight=class_weights)

## Creating PyTorch TensorDatasets and DataLoaders for training and testing sets

In [11]:
train_dataset = TensorDataset(X_train_tokens['input_ids'], X_train_tokens['attention_mask'], y_train_tensor)
test_dataset = TensorDataset(X_test_tokens['input_ids'], X_test_tokens['attention_mask'], y_test_tensor)

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

## Training

In [13]:
optimizer = AdamW(model.parameters(), lr=5e-5) # Setting up AdamW optimizer for model parameters with a learning rate of 5e-5

# Checking for the availability of a GPU and moving the model to the appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

num_epochs = 3

# Training through 3 epochs
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    
    # Iterating over batches in the training data
    for batch in tqdm(train_dataloader, desc=f'Epoch {epoch + 1}/{num_epochs}'):
        input_ids, attention_mask, labels = batch
        input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss_function = loss_function.to(device)
        loss = loss_function(outputs.logits, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    average_loss = total_loss / len(train_dataloader)
    print(f'Training Loss: {average_loss}')

Epoch 1/3: 100%|██████████| 27530/27530 [4:55:54<00:00,  1.55it/s]  


Training Loss: 0.3320641086731462


Epoch 2/3:  43%|████▎     | 11899/27530 [2:07:41<2:47:39,  1.55it/s]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



## Evaluate

In [14]:
model.eval()
all_predictions = []
with torch.no_grad(): # Disabling gradient computation during evaluation
    
    # Iterating over batches in the testing dataloader
    for batch in tqdm(test_dataloader, desc='Evaluating'):
        input_ids, attention_mask, labels = batch
        input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)

        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=1).cpu().numpy()
        all_predictions.extend(predictions)

Evaluating: 100%|██████████| 6883/6883 [24:54<00:00,  4.61it/s]


## Evaluate the model

In [16]:
accuracy = accuracy_score(labels_test, all_predictions)
report = classification_report(labels_test, all_predictions)
weighted_f1 = f1_score(labels_test, all_predictions, average='weighted')
micro_f1 = f1_score(labels_test, all_predictions, average='micro')
macro_f1 = f1_score(labels_test, all_predictions, average='macro')

print(f"Accuracy: {accuracy}")
print("Classification Report:\n", report)
print(f"Weighted F1 Score: {weighted_f1}")
print(f"Micro F1 Score: {micro_f1}")
print(f"Macro F1 Score: {macro_f1}")

Accuracy: 0.875286628252805
Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.88      0.92    173537
           1       0.66      0.85      0.74     46696

    accuracy                           0.88    220233
   macro avg       0.81      0.87      0.83    220233
weighted avg       0.89      0.88      0.88    220233

Weighted F1 Score: 0.8807480468970654
Micro F1 Score: 0.875286628252805
Macro F1 Score: 0.830731310272037
