**Project**

Loading and label numbers of dataset:

In [None]:
import pandas as pd

df = pd.read_csv("/content/train.csv")

print("Shape:", df.shape)
df.head()


In [None]:
label_cols = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]

label_counts = df[label_cols].sum().sort_values()

print("Label distribution:\n")
print(label_counts)


Split:

In [None]:
from sklearn.model_selection import train_test_split

label_cols = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]


df["has_toxicity"] = (df[label_cols].sum(axis=1) > 0).astype(int)

train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    random_state=42,
    stratify=df["has_toxicity"]
)


train_df = train_df.drop(columns=["has_toxicity"])
val_df = val_df.drop(columns=["has_toxicity"])

print("Train shape:", train_df.shape)
print("Validation shape:", val_df.shape)


Cleaning Data:

In [None]:
import re

def clean_text(text):
    text = str(text)
    text = re.sub(r"http\S+|www\S+|https\S+", "", text)
    text = re.sub(r"@\w+", "", text)
    text = re.sub(r"(.)\1{3,}", r"\1\1", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

train_df["comment_text"] = train_df["comment_text"].apply(clean_text)
val_df["comment_text"] = val_df["comment_text"].apply(clean_text)


Dataset Switch:

In [None]:
# TRAINING MODE SWITCH


TRAIN_MODE = "original"   # change to "augmented" when needed

if TRAIN_MODE == "original":
    training_df = train_df
    print("Training on ORIGINAL data only")

elif TRAIN_MODE == "augmented":
    training_df = pd.concat([train_df, aug_df], ignore_index=True)
    training_df = training_df.sample(frac=1, random_state=42).reset_index(drop=True)
    print("Training on ORIGINAL + AUGMENTED data")

else:
    raise ValueError("TRAIN_MODE must be 'original' or 'augmented'")

print("Training samples:", len(training_df))
print("Validation samples:", len(val_df))


BERT Tokenization:

In [None]:
!pip install transformers

import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer


In [None]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")


In [None]:
class ToxicDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len=128):
        self.texts = dataframe["comment_text"].values
        self.labels = dataframe[label_cols].values
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        labels = torch.tensor(self.labels[idx], dtype=torch.float)

        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            padding="max_length",
            truncation=True,
            return_attention_mask=True,
            return_tensors="pt"
        )

        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "labels": labels
        }


In [None]:
train_dataset = ToxicDataset(training_df, tokenizer)
val_dataset = ToxicDataset(val_df, tokenizer)


In [None]:
BATCH_SIZE = 16

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
val_loader   = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

print("Train samples:", len(train_loader.dataset))
print("Val samples:", len(val_loader.dataset))


Sanity Check:

In [None]:
batch = next(iter(train_loader))

print("Input IDs shape:", batch["input_ids"].shape)
print("Attention mask shape:", batch["attention_mask"].shape)
print("Labels shape:", batch["labels"].shape)


BERT Model (no augmentaion)

In [None]:
import torch.nn as nn
from transformers import BertModel

class BertToxicClassifier(nn.Module):
    def __init__(self, n_labels):
        super(BertToxicClassifier, self).__init__()
        self.bert = BertModel.from_pretrained("bert-base-uncased")
        self.dropout = nn.Dropout(0.3)
        self.classifier = nn.Linear(self.bert.config.hidden_size, n_labels)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output  # [CLS] token representation
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        return logits


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = BertToxicClassifier(n_labels=len(label_cols))
model = model.to(device)

print("Using device:", device)




---------



**Augmentation**

Minority class sample extraction:

In [None]:
label_counts = train_df[label_cols].sum().sort_values()
print(label_counts)


threat             380
identity_hate     1146
severe_toxic      1290
insult            6301
obscene           6787
toxic            12248
dtype: int64


In [None]:
minority_labels = ["threat", "identity_hate"]
minority_df = train_df[train_df[minority_labels].any(axis=1)].copy()

print("Minority samples count:", len(minority_df))
minority_df.head()


In [None]:
len(minority_df)

Model for Paraphrasing Implementation:

In [None]:
!pip install openai tqdm

import pandas as pd
from tqdm import tqdm
from openai import OpenAI
import time


In [None]:
client = OpenAI(api_key="_Your_API_Key_Here")


Paraphrasing Function:

In [None]:
def generate_paraphrases(text, n=2):
    prompt = f"""
Paraphrase the following toxic online comment.
Keep the meaning and level of offensiveness the same.
Return {n} different paraphrased versions as a numbered list.

Comment: "{text}"
"""

    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": prompt}],
            service_tier="priority"
        )

        output = response.choices[0].message.content

        # Splits numbered list into separate paraphrases
        paras = []
        for line in output.split("\n"):
            line = line.strip()
            if line and line[0].isdigit():
                paras.append(line.split(".", 1)[1].strip())

        return paras[:n]

    except Exception as e:
        print("Error:", e)
        time.sleep(2)
        return []


API Calls:

In [None]:
augmented_rows = []

for _, row in tqdm(minority_df.iterrows(), total=len(minority_df)):
    text = row["comment_text"]
    labels = row[label_cols].values

    paraphrases = generate_paraphrases(text, n=2)

    for para in paraphrases:
        new_row = row.copy()
        new_row["comment_text"] = para
        augmented_rows.append(new_row)

augmented_df = pd.DataFrame(augmented_rows)
print("Generated paraphrases:", len(augmented_df))


100%|██████████| 1442/1442 [27:50<00:00,  1.16s/it]

Generated paraphrases: 2271





Merging with Training Data:

In [None]:
aug_df = pd.DataFrame(augmented_rows)
aug_df.to_csv("augmented_train.csv", index=False)

print("Saved!", len(aug_df), "augmented samples")


In [None]:
aug_df = pd.read_csv("/content/augmented_train.csv")

# Combine with original training data
full_train_df = pd.concat([train_df, aug_df], ignore_index=True)

print("Original train size:", len(train_df))
print("Augmented train size:", len(full_train_df))


Class-Weighted Version:


P.S. Run this again after augmentation and merging

In [None]:
label_counts = training_df[label_names].sum().values
total_samples = len(train_df)

pos_weights = (total_samples - label_counts) / label_counts

# Prevent exploding weights
pos_weights = np.clip(pos_weights, 1, 20)

pos_weights = torch.tensor(pos_weights, dtype=torch.float).to(device)

criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weights)


Training & Validation Function:

In [None]:
from tqdm import tqdm
from sklearn.metrics import f1_score
from sklearn.metrics import f1_score, classification_report
import numpy as np
import torch

def train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs=3):

    for epoch in range(epochs):
        print(f"\nEpoch {epoch+1}/{epochs}")

        # Training
        model.train()
        total_train_loss = 0

        train_bar = tqdm(train_loader, desc="Training", leave=False)

        for batch in train_bar:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask=attention_mask)
            loss = criterion(outputs.logits, labels)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

            total_train_loss += loss.item()
            train_bar.set_postfix(loss=loss.item())

        avg_train_loss = total_train_loss / len(train_loader)
        print("Train Loss:", round(avg_train_loss, 4))

        # Validation
        model.eval()
        total_val_loss = 0
        all_preds, all_labels = [], []

        val_bar = tqdm(val_loader, desc="Validating", leave=False)

        with torch.no_grad():
            for batch in val_bar:
                input_ids = batch["input_ids"].to(device)
                attention_mask = batch["attention_mask"].to(device)
                labels = batch["labels"].to(device)

                outputs = model(input_ids, attention_mask=attention_mask)
                loss = criterion(outputs.logits, labels)
                total_val_loss += loss.item()

                preds = torch.sigmoid(outputs.logits).cpu().numpy()
                all_preds.append(preds)
                all_labels.append(labels.cpu().numpy())

                val_bar.set_postfix(loss=loss.item())

        avg_val_loss = total_val_loss / len(val_loader)

        all_preds = (np.vstack(all_preds) > 0.5).astype(int)
        all_labels = np.vstack(all_labels)

        macro_f1 = f1_score(all_labels, all_preds, average="macro")
        micro_f1 = f1_score(all_labels, all_preds, average="micro")

        print("Val Loss:", round(avg_val_loss, 4))
        print("Val Macro F1:", round(macro_f1, 4))
        print("Val Micro F1:", round(micro_f1, 4))

        label_names = ['toxic','severe_toxic','obscene','threat','insult','identity_hate']
        print("\nPer-class F1:")
        print(classification_report(all_labels, all_preds, target_names=label_names, zero_division=0))

Run Training:  

In [None]:
def move_to_device(batch, device):
    return {
        "input_ids": batch["input_ids"].to(device),
        "attention_mask": batch["attention_mask"].to(device),
        "labels": batch["labels"].to(device)
    }


In [None]:
from torch.utils.data import Dataset, DataLoader

class ToxicDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encoding = self.tokenizer(
            str(self.texts[idx]),
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt"
        )

        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "labels": torch.tensor(self.labels[idx], dtype=torch.float)
        }

label_cols = ['toxic','severe_toxic','obscene','threat','insult','identity_hate']

train_dataset = ToxicDataset(
    full_train_df["comment_text"].values,
    full_train_df[label_cols].values,
    tokenizer
)

val_dataset = ToxicDataset(
    val_df["comment_text"].values,
    val_df[label_cols].values,
    tokenizer
)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)


In [None]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    num_labels=6,
    problem_type="multi_label_classification"
)

model.to(device)


In [None]:
import torch.nn as nn
from torch.optim import AdamW

#criterion = nn.BCEWithLogitsLoss()
criterion = nn.BCEWithLogitsLoss(pos_weight=weights.to(device))
optimizer = AdamW(model.parameters(), lr=1e-5)


In [None]:
train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs=2)


Model Reinitializing:

In [None]:
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=6, problem_type="multi_label_classification").to(device)
