In [3]:
!pip install -q transformers datasets scikit-learn torch accelerate


In [4]:
import torch
from torch.utils.data import DataLoader, TensorDataset, RandomSampler, SequentialSampler
from torch.optim import AdamW
from transformers import AutoTokenizer, AutoModelForSequenceClassification, get_linear_schedule_with_warmup
from datasets import load_dataset
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import numpy as np
import os
from tqdm.auto import tqdm


In [6]:
MODEL_NAME = "roberta-base"
BATCH_SIZE = 16
EPOCHS = 3
MAX_LEN = 128
LR = 2e-5
SEED = 42
SAVE_DIR = "/content/drive/MyDrive/HateSpeechDetection"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

torch.manual_seed(SEED)
np.random.seed(SEED)
if device.type == "cuda":
    torch.cuda.manual_seed_all(SEED)


Device: cuda


In [7]:
print(" Loading HateXplain dataset...")

dataset = load_dataset("hate_speech_offensive")  # Hugging Face dataset

# Preview
print(dataset)


 Loading HateXplain dataset...
DatasetDict({
    train: Dataset({
        features: ['count', 'hate_speech_count', 'offensive_language_count', 'neither_count', 'class', 'tweet'],
        num_rows: 24783
    })
})


In [9]:
# Convert to binary labels: 1 = offensive, 0 = non-offensive
def prepare_df(split):
    texts = split["tweet"]
    # Map classes: 0 (hate/offensive) → 1, 2 (neither) → 0
    labels = []
    for c in split["class"]:
        if c == 0:       # offensive/hate
            labels.append(1)
        elif c == 2:     # neither → non-offensive
            labels.append(0)
        else:            # class 1 = ambiguous, skip
            labels.append(-1)
    # Filter out ambiguous rows
    filtered_texts = [t for t, l in zip(texts, labels) if l != -1]
    filtered_labels = [l for l in labels if l != -1]
    return filtered_texts, filtered_labels

train_texts, train_labels = prepare_df(dataset["train"])
print(" Total samples after filtering:", len(train_texts))


 Total samples after filtering: 5593


In [10]:
from sklearn.model_selection import train_test_split

train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.1, random_state=42, stratify=train_labels
)

val_texts, test_texts, val_labels, test_labels = train_test_split(
    val_texts, val_labels, test_size=0.5, random_state=42, stratify=val_labels
)

print("Train:", len(train_texts), "| Val:", len(val_texts), "| Test:", len(test_texts))


Train: 5033 | Val: 280 | Test: 280


In [11]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def encode_texts(texts):
    enc = tokenizer(
        texts,
        padding="max_length",
        truncation=True,
        max_length=MAX_LEN,
        return_tensors="pt"
    )
    return enc["input_ids"], enc["attention_mask"]

# Convert to TensorDataset
def make_dataset(texts, labels):
    input_ids, attention_mask = encode_texts(texts)
    labels = torch.tensor(labels)
    return TensorDataset(input_ids, attention_mask, labels)

train_ds = make_dataset(train_texts, train_labels)
val_ds = make_dataset(val_texts, val_labels)
test_ds = make_dataset(test_texts, test_labels)

train_loader = DataLoader(train_ds, sampler=RandomSampler(train_ds), batch_size=BATCH_SIZE)
val_loader = DataLoader(val_ds, sampler=SequentialSampler(val_ds), batch_size=BATCH_SIZE)
test_loader = DataLoader(test_ds, sampler=SequentialSampler(test_ds), batch_size=BATCH_SIZE)


In [12]:
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)
model.to(device)

optimizer = AdamW(model.parameters(), lr=LR)
total_steps = len(train_loader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
def train_epoch(model, loader):
    model.train()
    total_loss = 0
    for batch in tqdm(loader, desc="Training"):
        input_ids, attn, labels = [x.to(device) for x in batch]
        model.zero_grad()
        outputs = model(input_ids=input_ids, attention_mask=attn, labels=labels)
        loss = outputs.loss
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()
        total_loss += loss.item()
    return total_loss / len(loader)

@torch.no_grad()
def eval_model(model, loader):
    model.eval()
    preds, true = [], []
    total_loss = 0
    for batch in tqdm(loader, desc="Evaluating"):
        input_ids, attn, labels = [x.to(device) for x in batch]
        outputs = model(input_ids=input_ids, attention_mask=attn, labels=labels)
        total_loss += outputs.loss.item()
        preds.extend(torch.argmax(outputs.logits, dim=1).cpu().numpy())
        true.extend(labels.cpu().numpy())
    acc = accuracy_score(true, preds)
    return total_loss/len(loader), acc, preds, true


In [14]:
best_acc = 0
for epoch in range(1, EPOCHS+1):
    print(f"\n===== Epoch {epoch}/{EPOCHS} =====")
    train_loss = train_epoch(model, train_loader)
    val_loss, val_acc, _, _ = eval_model(model, val_loader)
    print(f"Train Loss={train_loss:.4f} | Val Loss={val_loss:.4f} | Val Acc={val_acc:.4f}")

    if val_acc > best_acc:
        best_acc = val_acc
        os.makedirs(SAVE_DIR, exist_ok=True)
        model.save_pretrained(SAVE_DIR)
        tokenizer.save_pretrained(SAVE_DIR)
        print(f" Best model saved to {SAVE_DIR}")



===== Epoch 1/3 =====


Training:   0%|          | 0/315 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/18 [00:00<?, ?it/s]

Train Loss=0.2835 | Val Loss=0.3033 | Val Acc=0.8964
 Best model saved to /content/drive/MyDrive/HateSpeechDetection

===== Epoch 2/3 =====


Training:   0%|          | 0/315 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/18 [00:00<?, ?it/s]

Train Loss=0.1485 | Val Loss=0.2832 | Val Acc=0.9357
 Best model saved to /content/drive/MyDrive/HateSpeechDetection

===== Epoch 3/3 =====


Training:   0%|          | 0/315 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/18 [00:00<?, ?it/s]

Train Loss=0.0931 | Val Loss=0.3361 | Val Acc=0.9321


In [16]:
print("\n Evaluating on Test Set: ")
test_loss, test_acc, preds, true = eval_model(model, test_loader)
print(f" Test Accuracy: {test_acc:.4f}")

from sklearn.metrics import classification_report, confusion_matrix
print("\n Classification Report: ")
print(classification_report(true, preds, target_names=["Offensive", "Non-Offensive"]))

print("\n Confusion Matrix:")
print(confusion_matrix(true, preds))




 Evaluating on Test Set: 


Evaluating:   0%|          | 0/18 [00:00<?, ?it/s]

 Test Accuracy: 0.9357

 Classification Report: 
               precision    recall  f1-score   support

    Offensive       0.96      0.96      0.96       209
Non-Offensive       0.87      0.87      0.87        71

     accuracy                           0.94       280
    macro avg       0.92      0.92      0.92       280
 weighted avg       0.94      0.94      0.94       280


 Confusion Matrix:
[[200   9]
 [  9  62]]


In [22]:
from google.colab import drive
import os

# Use a different folder as mount point
drive.mount('/content/my_drive')

# Set save path in Drive
SAVE_DIR = "/content/my_drive/MyDrive/HateSpeechDetection"  # adjust MyDrive if needed
os.makedirs(SAVE_DIR, exist_ok=True)

# Save model
model.save_pretrained(SAVE_DIR)
tokenizer.save_pretrained(SAVE_DIR)

print(f" Model saved to {SAVE_DIR}")


Mounted at /content/my_drive
 Model saved to /content/my_drive/MyDrive/HateSpeechDetection
