In [14]:
import pandas as pd
import numpy as np
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification, Trainer, TrainingArguments, pipeline
from sklearn.metrics import classification_report, roc_auc_score
import torch
from sklearn.model_selection import train_test_split

#Download and Prepare the Dataset

In [4]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Toxic Comment Classification/train.csv')
df.head()

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,0000997932d777bf,Explanation\nWhy the edits made under my usern...,0,0,0,0,0,0
1,000103f0d9cfb60f,D'aww! He matches this background colour I'm s...,0,0,0,0,0,0
2,000113f07ec002fd,"Hey man, I'm really not trying to edit war. It...",0,0,0,0,0,0
3,0001b41b1c6bb37e,"""\nMore\nI can't make any real suggestions on ...",0,0,0,0,0,0
4,0001d958c54c6e35,"You, sir, are my hero. Any chance you remember...",0,0,0,0,0,0


In [5]:
df_small = df.sample(n=5000, random_state=42)

In [6]:
#Extract labels and texts
labels = df_small[['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']].values
texts = df_small['comment_text'].tolist()

#Split data (80% train, 20% validation)
train_texts, val_texts, train_labels, val_labels = train_test_split(
    texts, labels, test_size=0.2, random_state=42
)

#Initialize DistilBERT tokenizer
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')

#Tokenize texts
train_encodings = tokenizer(
    train_texts,
    truncation=True,
    padding=True,
    max_length=128
)
val_encodings = tokenizer(
    val_texts,
    truncation=True,
    padding=True,
    max_length=128
)

#Create PyTorch dataset
class ToxicCommentDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.float)
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = ToxicCommentDataset(train_encodings, train_labels)
val_dataset = ToxicCommentDataset(val_encodings, val_labels)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

In [7]:
# Initialize DistilBERT for multi-label classification (6 labels)
model = DistilBertForSequenceClassification.from_pretrained(
    'distilbert-base-uncased',
    num_labels=6,
    problem_type="multi_label_classification"
)

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
# Set up training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=1,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=8,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_dir='./logs',
    logging_steps=100,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss"
)

In [9]:
def compute_metrics(pred):
    logits, labels = pred
    probs = 1 / (1 + np.exp(-logits))  # Sigmoid for probabilities
    aucs = []
    for i in range(labels.shape[1]):
        try:
            auc = roc_auc_score(labels[:, i], probs[:, i])
        except:
            auc = np.nan
        aucs.append(auc)
    return {
        'roc_auc': np.nanmean(aucs)
    }

In [10]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics
)

In [11]:
trainer.train()



<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mpothuriavaneesh[0m ([33mpothuriavaneesh-bits-pilani[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Roc Auc
1,0.065,0.053742,0.948631


TrainOutput(global_step=1000, training_loss=0.08597797870635987, metrics={'train_runtime': 93.3494, 'train_samples_per_second': 42.85, 'train_steps_per_second': 10.712, 'total_flos': 132476848128000.0, 'train_loss': 0.08597797870635987, 'epoch': 1.0})

In [12]:
eval_results = trainer.evaluate()
print(eval_results)

{'eval_loss': 0.0537419393658638, 'eval_roc_auc': 0.9486309579842382, 'eval_runtime': 3.5518, 'eval_samples_per_second': 281.547, 'eval_steps_per_second': 35.193, 'epoch': 1.0}


In [13]:
model.save_pretrained('./toxic_comment_classifier')
tokenizer.save_pretrained('./toxic_comment_classifier')

('./toxic_comment_classifier/tokenizer_config.json',
 './toxic_comment_classifier/special_tokens_map.json',
 './toxic_comment_classifier/vocab.txt',
 './toxic_comment_classifier/added_tokens.json',
 './toxic_comment_classifier/tokenizer.json')

In [17]:
classifier = pipeline(
    "text-classification",
    model='./toxic_comment_classifier',
    tokenizer='./toxic_comment_classifier',
    function_to_apply='sigmoid',  # For multi-label
    top_k=6  # Show all 6 labels
)

# Test with a sample comment
sample_comment = "I hate you so much, you worthless piece of trash!"
results = classifier(sample_comment)

label_names = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
result = results[0]
for i, label in enumerate(label_names):
    print(f"{label}: {result[i]['score']:.2f}")

Device set to use cuda:0


toxic: 0.95
severe_toxic: 0.79
obscene: 0.73
threat: 0.10
insult: 0.05
identity_hate: 0.02
