# XLM-RoBERTa Model for Binary Faux-Hate Detection

## Libraries

In [2]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import XLMRobertaModel, XLMRobertaTokenizer
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

## 1. Getting dataset and preparing tokenizer (xlm-roberta-base)

In [3]:
data = pd.read_csv("/content/drive/MyDrive/Icon Conference/Data/Cleaned_Task_A.csv")
tokenizer = XLMRobertaTokenizer.from_pretrained('xlm-roberta-base')

MAX_LEN = 128

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


##2. Creating custom dataset and dataloaders

In [4]:
class FauxHateDataset(Dataset):
    def __init__(self, data, tokenizer, max_len):
        self.data = data
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        row = self.data.iloc[index]
        text = row['Tweet']
        label_faux = row['Fake']
        label_hate = row['Hate']

        # Tokenize the text
        encoding = self.tokenizer.encode_plus(
            text,
            max_length=self.max_len,
            truncation=True,
            padding='max_length',
            add_special_tokens=True,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels_faux': torch.tensor(label_faux, dtype=torch.long),
            'labels_hate': torch.tensor(label_hate, dtype=torch.long)
        }

In [5]:
data = data.dropna(subset=['Tweet'])

# Split the data into training and validation sets
train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)  # 80% train, 20% validation

# Create dataset and dataloaders
train_dataset = FauxHateDataset(train_data, tokenizer, MAX_LEN)
val_dataset = FauxHateDataset(val_data, tokenizer, MAX_LEN)

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Example to check one batch
batch = next(iter(train_dataloader))
print(batch)

{'input_ids': tensor([[    0,   873, 18788,  ...,     1,     1,     1],
        [    0,  3036,   350,  ...,     1,     1,     1],
        [    0,  9925,    83,  ...,     1,     1,     1],
        ...,
        [    0,  1184,    13,  ...,     1,     1,     1],
        [    0,  7224, 73601,  ...,     1,     1,     1],
        [    0, 49191,  7525,  ...,     1,     1,     1]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]]), 'labels_faux': tensor([1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1,
        1, 1, 0, 1, 1, 1, 0, 0]), 'labels_hate': tensor([1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1,
        1, 1, 1, 1, 1, 0, 0, 0])}


## 3. Defining model architecture

In [6]:
import torch
import torch.nn as nn
from transformers import XLMRobertaModel, XLMRobertaTokenizer

class FauxHateDetector(nn.Module):
    def __init__(self, model_name='xlm-roberta-base', num_labels_task1=2, num_labels_task2=2):
        super(FauxHateDetector, self).__init__()
        self.model = XLMRobertaModel.from_pretrained(model_name)

        # Separate classification heads for 'faux' and 'hate'
        self.classifier_hate = nn.Linear(self.model.config.hidden_size, num_labels_task2)
        self.classifier_faux = nn.Linear(self.model.config.hidden_size, num_labels_task1)

    def forward(self, input_ids, attention_mask):
        # Get the embeddings from the base XLM-RoBERTa model
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0, :]  # CLS token embedding

        # Task-specific heads
        faux_logits = self.classifier_faux(cls_output)
        hate_logits = self.classifier_hate(cls_output)

        return faux_logits, hate_logits


## 4.  Fitting the model to data (training)

In [7]:
from transformers import AdamW
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score

# Define training function
def train(model, dataloader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for batch in dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels_faux = batch['labels_faux'].to(device)
        labels_hate = batch['labels_hate'].to(device)

        optimizer.zero_grad()

        # Forward pass
        faux_logits, hate_logits = model(input_ids, attention_mask)

        # Calculate losses for both tasks
        loss_faux = criterion(faux_logits, labels_faux)
        loss_hate = criterion(hate_logits, labels_hate)

        # Combined loss (weighted if needed)
        loss = loss_faux + loss_hate
        total_loss += loss.item()

        # Backward pass
        loss.backward()
        optimizer.step()

    avg_loss = total_loss / len(dataloader)
    return avg_loss


## 5. Making predictions and evaluating a model (inference

In [8]:
from sklearn.metrics import classification_report

# Define evaluation function
def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0
    all_labels_faux, all_preds_faux = [], []
    all_labels_hate, all_preds_hate = [], []

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels_faux = batch['labels_faux'].to(device)
            labels_hate = batch['labels_hate'].to(device)

            # Forward pass
            faux_logits, hate_logits = model(input_ids, attention_mask)

            # Calculate losses
            loss_faux = criterion(faux_logits, labels_faux)
            loss_hate = criterion(hate_logits, labels_hate)
            loss = loss_faux + loss_hate
            total_loss += loss.item()

            # Store labels and predictions for accuracy metrics
            all_labels_faux.extend(labels_faux.cpu().numpy())
            all_preds_faux.extend(torch.argmax(faux_logits, dim=1).cpu().numpy())

            all_labels_hate.extend(labels_hate.cpu().numpy())
            all_preds_hate.extend(torch.argmax(hate_logits, dim=1).cpu().numpy())

    avg_loss = total_loss / len(dataloader)
    report_faux = classification_report(all_labels_faux, all_preds_faux)
    report_hate = classification_report(all_labels_hate, all_preds_hate)

    return avg_loss, report_faux, report_hate


## 6. Training and Testing loop

In [None]:
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Model, criterion, optimizer
model = FauxHateDetector().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = AdamW(params = model.parameters(), lr=2e-5)

# Variables to track the best model
best_val_loss = float('inf')
save_path = "/content/drive/MyDrive/Icon Conference/multitaskXLMRoBERTa/best_model.pth"
torch.manual_seed(42)

# Training loop
num_epochs = 3
for epoch in range(num_epochs):
    train_loss = train(model, train_dataloader, optimizer, criterion, device)
    val_loss, faux_report, hate_report = evaluate(model, val_dataloader, criterion, device)

    # Check if validation loss improved and save the best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), save_path)
        print(f"New best model saved with validation loss: {val_loss:.4f}")

    print(f"Epoch {epoch + 1}/{num_epochs}")
    print(f"Train Loss: {train_loss:.4f} | Validation Loss: {val_loss:.4f}")
    print("Faux Detection Report:\n", faux_report)
    print("Hate Detection Report:\n", hate_report)



## 7. Model evaluation and visualization

In [None]:
import torch
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix

# Load the best model
best_model = FauxHateDetector().to(device)
best_model.load_state_dict(torch.load("/content/drive/MyDrive/Icon Conference/multitaskXLMRoBERTa/best_model.pth"))
best_model.eval()

# Evaluate and visualize on the test set
def evaluate_and_visualize_test(model, dataloader, criterion, device):
    model.eval()
    all_labels_faux, all_preds_faux = [], []
    all_labels_hate, all_preds_hate = [], []
    total_loss = 0

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels_faux = batch['labels_faux'].to(device)
            labels_hate = batch['labels_hate'].to(device)

            # Forward pass
            faux_logits, hate_logits = model(input_ids, attention_mask)

            # Calculate loss
            loss_faux = criterion(faux_logits, labels_faux)
            loss_hate = criterion(hate_logits, labels_hate)
            total_loss += (loss_faux + loss_hate).item()

            # Store predictions and labels
            all_labels_faux.extend(labels_faux.cpu().numpy())
            all_preds_faux.extend(torch.argmax(faux_logits, dim=1).cpu().numpy())
            all_labels_hate.extend(labels_hate.cpu().numpy())
            all_preds_hate.extend(torch.argmax(hate_logits, dim=1).cpu().numpy())

    avg_loss = total_loss / len(dataloader)
    print(f"Test Loss: {avg_loss:.4f}")

    # Print classification reports
    print("Faux Detection Report:\n", classification_report(all_labels_faux, all_preds_faux))
    print("Hate Detection Report:\n", classification_report(all_labels_hate, all_preds_hate))

    # Confusion matrices
    conf_matrix_faux = confusion_matrix(all_labels_faux, all_preds_faux)
    conf_matrix_hate = confusion_matrix(all_labels_hate, all_preds_hate)

    # Plot confusion matrices
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))

    sns.heatmap(conf_matrix_faux, annot=True, fmt='d', cmap='Blues', ax=axes[0])
    axes[0].set_title("Confusion Matrix for Faux Detection")
    axes[0].set_xlabel("Predicted Labels")
    axes[0].set_ylabel("True Labels")

    sns.heatmap(conf_matrix_hate, annot=True, fmt='d', cmap='Reds', ax=axes[1])
    axes[1].set_title("Confusion Matrix for Hate Detection")
    axes[1].set_xlabel("Predicted Labels")
    axes[1].set_ylabel("True Labels")

    plt.show()

# Assuming `test_dataloader` is already created for test data
evaluate_and_visualize_test(best_model, val_dataloader, criterion, device)
