In [15]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW, get_linear_schedule_with_warmup
from sklearn.metrics import classification_report
from tqdm import tqdm

In [16]:
df_train1 = pd.read_csv("data/latenthatred/latent_train.tsv", sep='\t')
df_test1 = pd.read_csv("data/latenthatred/latent_test.tsv", sep='\t')
df_train1['class'].value_counts().sum()+ df_test1['class'].value_counts().sum()
df_train1

Unnamed: 0,post,class
0,such racism goy there is only shared h...,1
1,rt and the color of facist fiction is white,1
2,girl sure you can wait must coz your allah...,0
3,who are their supporters black people or white...,0
4,you re wrong i m white and my kids are white,0
...,...,...
14268,not only secure the border but restrict all i...,1
14269,the white genocide continues white race now ...,1
14270,so dan carlin is uninformed about the nordics ...,1
14271,by western men i mean white western men don...,0


In [17]:
import os
import pandas as pd
import torch
from torch import nn
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.utils.data import Dataset, DataLoader
from typing import List, Dict, Tuple

class HateSpeechEntailmentDataset(Dataset):
    def __init__(
        self,
        data_path: str,
        tokenizer: AutoTokenizer,
        label_prompts: Dict[int, str],
        max_length: int = 128
    ):
        """
        Dataset class for entailment-style hate speech detection
        
        Args:
            data_path: Path to TSV file with 'post' and 'class' columns
            tokenizer: HuggingFace tokenizer
            label_prompts: Dictionary mapping labels to prompt text
            max_length: Maximum sequence length
        """
        # Read TSV file
        print(f"Loading data from {data_path}")
        df = pd.read_csv(data_path, sep='\t')
        self.texts = df['post'].astype(str).tolist()
        self.labels = df['class'].astype(int).tolist()
        print(f"Loaded {len(self.texts)} examples")
        
        self.tokenizer = tokenizer
        self.label_prompts = label_prompts
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        
        # Create positive and negative pairs
        pos_prompt = self.label_prompts[label]
        neg_prompt = self.label_prompts[1 - label]
        
        # Tokenize positive pair
        pos_encoding = self.tokenizer(
            text,
            pos_prompt,
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        # Tokenize negative pair
        neg_encoding = self.tokenizer(
            text,
            neg_prompt, 
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        return {
            'pos_input_ids': pos_encoding['input_ids'].squeeze(),
            'pos_attention_mask': pos_encoding['attention_mask'].squeeze(),
            'neg_input_ids': neg_encoding['input_ids'].squeeze(),
            'neg_attention_mask': neg_encoding['attention_mask'].squeeze(),
            'label': torch.tensor(1)  # Positive pair should entail
        }

class HateSpeechEntailmentModel(nn.Module):
    def __init__(
        self,
        model_name: str = 'cardiffnlp/twitter-xlm-roberta-base-sentiment',
        num_labels: int = 2
    ):
        """
        Model class for entailment-style hate speech detection
        """
        super().__init__()
        self.model = AutoModelForSequenceClassification.from_pretrained(
            model_name,
            num_labels=num_labels
        )
        
    def forward(
        self,
        input_ids: torch.Tensor,
        attention_mask: torch.Tensor
    ) -> torch.Tensor:
        outputs = self.model(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        return outputs.logits

# Add this to your imports
from tqdm import tqdm

def train_model(
    model: nn.Module,
    train_loader: DataLoader,
    val_loader: DataLoader,
    optimizer: torch.optim.Optimizer,
    device: torch.device,
    num_epochs: int = 5,
    model_save_path: str = None
) -> None:
    """Train model with validation monitoring"""
    criterion = nn.CrossEntropyLoss()
    best_val_f1 = 0
    
    for epoch in range(num_epochs):
        # Training
        model.train()
        total_loss = 0
        
        # Create progress bar for training
        train_pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Train]')
        
        for batch in train_pbar:
            pos_input_ids = batch['pos_input_ids'].to(device)
            pos_attention_mask = batch['pos_attention_mask'].to(device)
            neg_input_ids = batch['neg_input_ids'].to(device)
            neg_attention_mask = batch['neg_attention_mask'].to(device)
            
            optimizer.zero_grad()
            
            # Forward pass for both positive and negative pairs
            pos_logits = model(pos_input_ids, pos_attention_mask)
            neg_logits = model(neg_input_ids, neg_attention_mask)
            
            # Calculate loss
            pos_labels = torch.ones(pos_logits.size(0), dtype=torch.long).to(device)
            neg_labels = torch.zeros(neg_logits.size(0), dtype=torch.long).to(device)
            
            pos_loss = criterion(pos_logits, pos_labels)
            neg_loss = criterion(neg_logits, neg_labels)
            loss = pos_loss + neg_loss
            
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            
            # Update progress bar
            train_pbar.set_postfix({'loss': f'{loss.item():.4f}'})
        
        avg_loss = total_loss / len(train_loader)
        
        # Validation
        print("\nRunning validation...")
        val_acc, val_f1 = evaluate(model, val_loader, device)
        
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        print(f"Average Loss: {avg_loss:.4f}")
        print(f"Validation Accuracy: {val_acc:.4f}")
        print(f"Validation F1: {val_f1:.4f}")
        
        # Save best model
        if model_save_path and val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), model_save_path)
            print(f"Saved best model with F1: {val_f1:.4f}")
        print("-" * 50)

def evaluate(
    model: nn.Module,
    dataloader: DataLoader,
    device: torch.device
) -> Tuple[float, float]:
    """Evaluate model"""
    model.eval()
    predictions = []
    actual = []
    
    # Add progress bar for evaluation
    eval_pbar = tqdm(dataloader, desc='Evaluating')
    
    with torch.no_grad():
        for batch in eval_pbar:
            pos_input_ids = batch['pos_input_ids'].to(device)
            pos_attention_mask = batch['pos_attention_mask'].to(device)
            
            logits = model(pos_input_ids, pos_attention_mask)
            preds = torch.argmax(logits, dim=1)
            
            predictions.extend(preds.cpu().numpy())
            actual.extend(batch['label'].cpu().numpy())
    
    # Calculate metrics
    accuracy = sum(p == a for p, a in zip(predictions, actual)) / len(actual)
    
    # Calculate F1 score
    tp = sum((p == 1 and a == 1) for p, a in zip(predictions, actual))
    fp = sum((p == 1 and a == 0) for p, a in zip(predictions, actual))
    fn = sum((p == 0 and a == 1) for p, a in zip(predictions, actual))
    
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
    
    return accuracy, f1

In [18]:
#main for latent hatred dataset
def main():
    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    # Define paths
    train_path = "data/latenthatred/latent_train.tsv"
    test_path = "data/latenthatred/latent_test.tsv"
    model_save_path = "saved_models/best_model.pth"
    
    # Create directory for saved models if it doesn't exist
    os.makedirs(os.path.dirname(model_save_path), exist_ok=True)
    
    # Define label prompts
    label_prompts = {
        0: "this post contains normal words",
        1: "this post contains hate speech"
    }
    
    # Initialize tokenizer and model
    print("Initializing tokenizer and model...")
    model_name = 'bert-base-uncased'
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = HateSpeechEntailmentModel(model_name).to(device)
    
    # Create datasets
    print("Loading training data...")
    train_dataset = HateSpeechEntailmentDataset(
        data_path=train_path,
        tokenizer=tokenizer,
        label_prompts=label_prompts
    )
    
    print("Loading test data...")
    test_dataset = HateSpeechEntailmentDataset(
        data_path=test_path,
        tokenizer=tokenizer,
        label_prompts=label_prompts
    )
    
    # Create dataloaders
    print("Creating dataloaders...")
    train_loader = DataLoader(
        train_dataset,
        batch_size=16,
        shuffle=True
    )
    test_loader = DataLoader(
        test_dataset,
        batch_size=16,
        shuffle=False
    )
    
    # Initialize optimizer
    optimizer = torch.optim.AdamW(model.parameters(), lr=3e-5)
    
    print("Starting training...")
    # Train model
    train_model(
        model,
        train_loader,
        test_loader,
        optimizer,
        device,
        num_epochs=3,
        model_save_path=model_save_path
    )
    
    # Load best model and evaluate on test set
    print("\nEvaluating best model...")
    model.load_state_dict(torch.load(model_save_path))
    test_acc, test_f1 = evaluate(model, test_loader, device)
    print(f"\nFinal Test Results:")
    print(f"Accuracy: {test_acc:.4f}")
    print(f"F1 Score: {test_f1:.4f}")
if __name__ == "__main__":
    main()

Using device: cuda
Initializing tokenizer and model...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading training data...
Loading data from data/latenthatred/latent_train.tsv
Loaded 14273 examples
Loading test data...
Loading data from data/latenthatred/latent_test.tsv
Loaded 4079 examples
Creating dataloaders...
Starting training...


Epoch 1/3 [Train]: 100%|██████████| 893/893 [05:35<00:00,  2.66it/s, loss=1.5136]



Running validation...


Evaluating: 100%|██████████| 255/255 [00:16<00:00, 15.14it/s]



Epoch 1/3
Average Loss: 1.1041
Validation Accuracy: 0.8022
Validation F1: 0.8902
Saved best model with F1: 0.8902
--------------------------------------------------


Epoch 2/3 [Train]: 100%|██████████| 893/893 [05:35<00:00,  2.66it/s, loss=0.9499]



Running validation...


Evaluating: 100%|██████████| 255/255 [00:16<00:00, 15.13it/s]



Epoch 2/3
Average Loss: 0.8393
Validation Accuracy: 0.7499
Validation F1: 0.8571
--------------------------------------------------


Epoch 3/3 [Train]: 100%|██████████| 893/893 [05:35<00:00,  2.66it/s, loss=0.0129]



Running validation...


Evaluating: 100%|██████████| 255/255 [00:16<00:00, 15.13it/s]



Epoch 3/3
Average Loss: 0.5427
Validation Accuracy: 0.7566
Validation F1: 0.8614
--------------------------------------------------

Evaluating best model...


Evaluating: 100%|██████████| 255/255 [00:16<00:00, 15.13it/s]



Final Test Results:
Accuracy: 0.8022
F1 Score: 0.8902


In [24]:
import os
import pandas as pd
import torch
from torch import nn
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.utils.data import Dataset, DataLoader
from typing import List, Dict, Tuple
from tqdm import tqdm

class HateSpeechEntailmentDataset(Dataset):
    def __init__(
        self,
        data_path: str,
        tokenizer: AutoTokenizer,
        label_prompts: Dict[int, str],
        label_strategy: str = 'hate_vs_nonhate',  # or 'hate_offensive_vs_normal'
        max_length: int = 128
    ):
        """
        Dataset class for entailment-style hate speech detection
        
        Args:
            data_path: Path to TSV file with 'post' and 'class' columns
            tokenizer: HuggingFace tokenizer
            label_prompts: Dictionary mapping labels to prompt text
            label_strategy: How to combine labels - either 'hate_vs_nonhate' or 'hate_offensive_vs_normal'
            max_length: Maximum sequence length
        """
        # Read TSV file
        print(f"Loading data from {data_path}")
        df = pd.read_csv(data_path, sep='\t')
        self.texts = df['post'].astype(str).tolist()
        original_labels = df['label'].astype(int).tolist()
        
        # Convert 3-way labels to binary based on strategy
        if label_strategy == 'hate_vs_nonhate':
            # 2 (hate) -> 1, 1 (offensive) & 0 (normal) -> 0
            self.labels = [1 if label == 2 else 0 for label in original_labels]
        else:  # hate_offensive_vs_normal
            # 2 (hate) & 1 (offensive) -> 1, 0 (normal) -> 0
            self.labels = [0 if label == 0 else 1 for label in original_labels]
            
        print(f"Loaded {len(self.texts)} examples")
        print(f"Label distribution after conversion: {pd.Series(self.labels).value_counts()}")
        
        self.tokenizer = tokenizer
        self.label_prompts = label_prompts
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        
        # Create positive and negative pairs
        pos_prompt = self.label_prompts[label]
        neg_prompt = self.label_prompts[1 - label]
        
        # Tokenize positive pair
        pos_encoding = self.tokenizer(
            text,
            pos_prompt,
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        # Tokenize negative pair
        neg_encoding = self.tokenizer(
            text,
            neg_prompt, 
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        return {
            'pos_input_ids': pos_encoding['input_ids'].squeeze(),
            'pos_attention_mask': pos_encoding['attention_mask'].squeeze(),
            'neg_input_ids': neg_encoding['input_ids'].squeeze(),
            'neg_attention_mask': neg_encoding['attention_mask'].squeeze(),
            'label': torch.tensor(1)  # Positive pair should entail
        }

class HateSpeechEntailmentModel(nn.Module):
    def __init__(
        self,
        model_name: str = 'bert-base-uncased',
        num_labels: int = 2
    ):
        """
        Model class for entailment-style hate speech detection
        """
        super().__init__()
        self.model = AutoModelForSequenceClassification.from_pretrained(
            model_name,
            num_labels=num_labels
        )
        
    def forward(
        self,
        input_ids: torch.Tensor,
        attention_mask: torch.Tensor
    ) -> torch.Tensor:
        outputs = self.model(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        return outputs.logits

def train_model(
    model: nn.Module,
    train_loader: DataLoader,
    val_loader: DataLoader,
    optimizer: torch.optim.Optimizer,
    device: torch.device,
    num_epochs: int = 3,
    model_save_path: str = None
) -> None:
    """Train model with validation monitoring"""
    criterion = nn.CrossEntropyLoss()
    best_val_f1 = 0
    
    for epoch in range(num_epochs):
        # Training
        model.train()
        total_loss = 0
        
        # Create progress bar for training
        train_pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Train]')
        
        for batch in train_pbar:
            pos_input_ids = batch['pos_input_ids'].to(device)
            pos_attention_mask = batch['pos_attention_mask'].to(device)
            neg_input_ids = batch['neg_input_ids'].to(device)
            neg_attention_mask = batch['neg_attention_mask'].to(device)
            
            optimizer.zero_grad()
            
            # Forward pass for both positive and negative pairs
            pos_logits = model(pos_input_ids, pos_attention_mask)
            neg_logits = model(neg_input_ids, neg_attention_mask)
            
            # Calculate loss
            pos_labels = torch.ones(pos_logits.size(0), dtype=torch.long).to(device)
            neg_labels = torch.zeros(neg_logits.size(0), dtype=torch.long).to(device)
            
            pos_loss = criterion(pos_logits, pos_labels)
            neg_loss = criterion(neg_logits, neg_labels)
            loss = pos_loss + neg_loss
            
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            
            # Update progress bar
            train_pbar.set_postfix({'loss': f'{loss.item():.4f}'})
        
        avg_loss = total_loss / len(train_loader)
        
        # Validation
        print("\nRunning validation...")
        val_acc, val_f1 = evaluate(model, val_loader, device)
        
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        print(f"Average Loss: {avg_loss:.4f}")
        print(f"Validation Accuracy: {val_acc:.4f}")
        print(f"Validation F1: {val_f1:.4f}")
        
        # Save best model
        if model_save_path and val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save(model.state_dict(), model_save_path)
            print(f"Saved best model with F1: {val_f1:.4f}")
        print("-" * 50)

def evaluate(
    model: nn.Module,
    dataloader: DataLoader,
    device: torch.device
) -> Tuple[float, float]:
    """Evaluate model"""
    model.eval()
    predictions = []
    actual = []
    
    # Add progress bar for evaluation
    eval_pbar = tqdm(dataloader, desc='Evaluating')
    
    with torch.no_grad():
        for batch in eval_pbar:
            pos_input_ids = batch['pos_input_ids'].to(device)
            pos_attention_mask = batch['pos_attention_mask'].to(device)
            
            logits = model(pos_input_ids, pos_attention_mask)
            preds = torch.argmax(logits, dim=1)
            
            predictions.extend(preds.cpu().numpy())
            actual.extend(batch['label'].cpu().numpy())
    
    # Calculate metrics
    accuracy = sum(p == a for p, a in zip(predictions, actual)) / len(actual)
    
    # Calculate F1 score
    tp = sum((p == 1 and a == 1) for p, a in zip(predictions, actual))
    fp = sum((p == 1 and a == 0) for p, a in zip(predictions, actual))
    fn = sum((p == 0 and a == 1) for p, a in zip(predictions, actual))
    
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
    
    return accuracy, f1

In [25]:
def main():
    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    # Define paths
    train_path = "data/hatexplain/hx_train.tsv"  # Update with your paths
    test_path = "data/hatexplain/hx_test.tsv"
    model_save_path = "saved_models/best_model.pth"
    
    # Create directory for saved models if it doesn't exist
    os.makedirs(os.path.dirname(model_save_path), exist_ok=True)
    
    # Choose your label strategy
    label_strategy = 'hate_vs_nonhate'  # or 'hate_offensive_vs_normal'
    
    # Define label prompts based on strategy
    if label_strategy == 'hate_vs_nonhate':
        label_prompts = {
            0: "this post contains non-hateful content",  # normal + offensive
            1: "this post contains hate speech"  # hate
        }
    else:  # hate_offensive_vs_normal
        label_prompts = {
            0: "this post contains normal content",  # normal
            1: "this post contains harmful content"  # hate + offensive
        }
    
    # Initialize tokenizer and model
    print("Initializing tokenizer and model...")
    model_name = 'bert-base-uncased'
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = HateSpeechEntailmentModel(model_name).to(device)
    
    # Create datasets
    print("Loading training data...")
    train_dataset = HateSpeechEntailmentDataset(
        data_path=train_path,
        tokenizer=tokenizer,
        label_prompts=label_prompts,
        label_strategy=label_strategy
    )
    
    print("Loading test data...")
    test_dataset = HateSpeechEntailmentDataset(
        data_path=test_path,
        tokenizer=tokenizer,
        label_prompts=label_prompts,
        label_strategy=label_strategy
    )
    
    # Create dataloaders
    print("Creating dataloaders...")
    train_loader = DataLoader(
        train_dataset,
        batch_size=16,
        shuffle=True
    )
    test_loader = DataLoader(
        test_dataset,
        batch_size=16,
        shuffle=False
    )
    
    # Initialize optimizer
    optimizer = torch.optim.AdamW(model.parameters(), lr=3e-5)
    
    print("Starting training...")
    # Train model
    train_model(
        model,
        train_loader,
        test_loader,
        optimizer,
        device,
        num_epochs=3,
        model_save_path=model_save_path
    )
    
    # Load best model and evaluate on test set
    print("\nEvaluating best model...")
    model.load_state_dict(torch.load(model_save_path))
    test_acc, test_f1 = evaluate(model, test_loader, device)
    print(f"\nFinal Test Results:")
    print(f"Accuracy: {test_acc:.4f}")
    print(f"F1 Score: {test_f1:.4f}")

if __name__ == "__main__":
    main()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Using device: cuda
Initializing tokenizer and model...
Loading training data...
Loading data from data/hatexplain/hx_train.tsv
Loaded 15383 examples
Label distribution after conversion: 1    9132
0    6251
dtype: int64
Loading test data...
Loading data from data/hatexplain/hx_test.tsv
Loaded 1924 examples
Label distribution after conversion: 1    1142
0     782
dtype: int64
Creating dataloaders...
Starting training...


Epoch 1/3 [Train]: 100%|██████████| 962/962 [06:01<00:00,  2.66it/s, loss=0.4863]



Running validation...


Evaluating: 100%|██████████| 121/121 [00:08<00:00, 15.12it/s]



Epoch 1/3
Average Loss: 1.0265
Validation Accuracy: 0.7729
Validation F1: 0.8719
Saved best model with F1: 0.8719
--------------------------------------------------


Epoch 2/3 [Train]: 100%|██████████| 962/962 [06:01<00:00,  2.66it/s, loss=1.2852]



Running validation...


Evaluating: 100%|██████████| 121/121 [00:07<00:00, 15.16it/s]



Epoch 2/3
Average Loss: 0.7533
Validation Accuracy: 0.7994
Validation F1: 0.8885
Saved best model with F1: 0.8885
--------------------------------------------------


Epoch 3/3 [Train]: 100%|██████████| 962/962 [06:01<00:00,  2.66it/s, loss=0.0783]



Running validation...


Evaluating: 100%|██████████| 121/121 [00:08<00:00, 15.12it/s]



Epoch 3/3
Average Loss: 0.4304
Validation Accuracy: 0.7911
Validation F1: 0.8833
--------------------------------------------------

Evaluating best model...


Evaluating: 100%|██████████| 121/121 [00:08<00:00, 15.11it/s]


Final Test Results:
Accuracy: 0.7994
F1 Score: 0.8885



