The goal of this project is to use the pretrained RoBERTa transformer as a feature extractor with a costum classification head to determine if text messages are offensive or not.

In [1]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from transformers import AutoTokenizer, AutoModel, AutoConfig

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

import sys
from pathlib import Path

# Add src/ to path (once, so imports work)
sys.path.append(str(Path().resolve().parent / "src"))

from paths import DATA_CLEANED
from paths import DATA_PROCESSED

## Using RoBERTa as a feature extractor with a costum classification head

Found this pretrained model online: cardiffnlp/twitter-roberta-base-sentiment-latest (https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment-latest)

It is already pretrained on twitter messages. 

Load model

In [2]:
model_name = 'roberta-base'

tokenizer = AutoTokenizer.from_pretrained(model_name)
roberta = AutoModel.from_pretrained(model_name)

roberta.eval()

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RobertaModel(
  (embeddings): RobertaEmbeddings(
    (word_embeddings): Embedding(50265, 768, padding_idx=1)
    (position_embeddings): Embedding(514, 768, padding_idx=1)
    (token_type_embeddings): Embedding(1, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): RobertaEncoder(
    (layer): ModuleList(
      (0-11): 12 x RobertaLayer(
        (attention): RobertaAttention(
          (self): RobertaSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): RobertaSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (dr

Classification head

In [3]:
class CustomClassifier(nn.Module):
    def __init__(self, model_name, pooling='cls'):
        super().__init__()
        self.pooling = pooling.lower()
        self.base = AutoModel.from_pretrained(model_name)
        config = AutoConfig.from_pretrained(model_name)
        hidden_size = config.hidden_size  # Dynamically get the model's hidden size

        # Freeze all parameters of the base model
        for param in self.base.parameters():
            param.requires_grad = False

        # Custom classification head
        self.classifier = nn.Sequential(
            nn.Linear(hidden_size, 128),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(128, 2)
        )

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.base(input_ids=input_ids, attention_mask=attention_mask)

        # Pooling strategy: either CLS token or mean pooling over token embeddings
        if self.pooling == 'mean':
            token_embeddings = outputs.last_hidden_state
            input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size())
            sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
            sum_mask = input_mask_expanded.sum(1).clamp(min=1e-9)
            pooled = sum_embeddings / sum_mask
        else:
            pooled = outputs.last_hidden_state[:, 0, :]  # CLS token

        logits = self.classifier(pooled)

        # If labels are provided, calculate the loss
        if labels is not None:
            loss_fn = nn.CrossEntropyLoss()
            loss = loss_fn(logits, labels)
            return logits, loss

        return logits

## Load HASOC dataset for training, validation and testing

In [11]:
# Load training and test data
train_df = pd.read_csv(DATA_CLEANED / "hasoc_2019_en_train_cleaned.tsv", sep='\t')
test_df = pd.read_csv(DATA_PROCESSED / "hasoc_2019_en_test.tsv", sep='\t')

# here we are just using the labels of the first task, which is a binary classification
label = "task_1"

# Automatically map string labels to integers
label_list = sorted(train_df[label].unique())
label_map = {label: idx for idx, label in enumerate(label_list)}

train_df[label] = train_df[label].map(label_map)
test_df[label] = test_df[label].map(label_map)

# Custom Dataset Class
class HateSpeechDataset(Dataset):
    def __init__(self, df, tokenizer, label = 'label', max_len=128):
        self.texts = df["text"].tolist()
        self.labels = df[label].tolist()
        self.encodings = tokenizer(self.texts, padding=True, truncation=True, max_length=max_len)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Create PyTorch Datasets and DataLoaders
train_dataset = HateSpeechDataset(train_df, tokenizer, label=label)
test_dataset = HateSpeechDataset(test_df, tokenizer, label=label)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

## Training and evaluation of model

In [12]:
# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize model
model = CustomClassifier(model_name, pooling="cls").to(device)

# Optimizer only for the classification head
optimizer = torch.optim.AdamW(model.classifier.parameters(), lr=2e-4)
epochs = 3

# Training loop
for epoch in range(epochs):
    model.train()
    train_preds, train_labels = [], []

    for batch in train_loader:
        # Move batch to device
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        optimizer.zero_grad()
        logits, loss = model(input_ids, attention_mask, labels)
        loss.backward()
        optimizer.step()

        preds = torch.argmax(logits, dim=1)
        train_preds.extend(preds.cpu().numpy())
        train_labels.extend(labels.cpu().numpy())

    # Compute training metrics
    acc = accuracy_score(train_labels, train_preds)
    prec = precision_score(train_labels, train_preds, average='macro')
    rec = recall_score(train_labels, train_preds, average='macro')
    f1 = f1_score(train_labels, train_preds, average='macro')
    f1_weighted = f1_score(train_labels, train_preds, average='weighted')

    print(f"\nEpoch {epoch+1}")
    print(f"Train Loss: {loss.item():.4f}")
    print(f"Train Accuracy: {acc:.4f}")
    print(f"Train Precision (macro): {prec:.4f}")
    print(f"Train Recall (macro): {rec:.4f}")
    print(f"Train F1 (macro): {f1:.4f}")
    print(f"Train F1 (weighted): {f1_weighted:.4f}")

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Epoch 1
Train Loss: 0.8547
Train Accuracy: 0.6063
Train Precision (macro): 0.5029
Train Recall (macro): 0.5004
Train F1 (macro): 0.4071
Train F1 (weighted): 0.4852

Epoch 2
Train Loss: 0.7142
Train Accuracy: 0.6227
Train Precision (macro): 0.6247
Train Recall (macro): 0.5181
Train F1 (macro): 0.4335
Train F1 (weighted): 0.5079

Epoch 3
Train Loss: 0.6913
Train Accuracy: 0.6328
Train Precision (macro): 0.6275
Train Recall (macro): 0.5391
Train F1 (macro): 0.4858
Train F1 (weighted): 0.5482


## Testing of model

In [13]:
# Evaluation on the test set
model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for batch in test_loader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        logits = model(input_ids, attention_mask)
        preds = torch.argmax(logits, dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Compute test metrics
acc = accuracy_score(all_labels, all_preds)
prec = precision_score(all_labels, all_preds, average='macro')
rec = recall_score(all_labels, all_preds, average='macro')
f1 = f1_score(all_labels, all_preds, average='macro')
f1_weighted = f1_score(all_labels, all_preds, average='weighted')

print(f"\n--- Test Results ---")
print(f"Test Accuracy: {acc:.4f}")
print(f"Test Precision (macro): {prec:.4f}")
print(f"Test Recall (macro): {rec:.4f}")
print(f"Test F1 (macro): {f1:.4f}")
print(f"Test F1 (weighted): {f1_weighted:.4f}")


--- Test Results ---
Test Accuracy: 0.7710
Test Precision (macro): 0.7267
Test Recall (macro): 0.5671
Test F1 (macro): 0.5615
Test F1 (weighted): 0.7132
