In [None]:
!pip install -q transformers==4.39.3 tokenizers==0.15.2 accelerate scikit-learn gradio

import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import pandas as pd
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device


[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m134.8/134.8 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m8.8/8.8 MB[0m [31m56.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m3.6/3.6 MB[0m [31m21.6 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
sentence-transformers 5.2.0 requires transformers<6.0.0,>=4.41.0, but you have transformers 4.39.3 which is incompatible.[0m[31m
[0m

device(type='cuda')

In [None]:
df = pd.read_csv("/content/train.csv", engine="python", on_bad_lines="skip")
df = df[['comment_text','toxic','severe_toxic','obscene','threat','insult','identity_hate']]
df.shape


(159571, 7)

In [None]:
df_small = df.sample(74980, random_state=42).reset_index(drop=True)

label_cols = df_small.columns[1:].tolist()
train_df, val_df = train_test_split(df_small, test_size=0.1, random_state=42)


In [None]:
tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")

class ToxicDataset(Dataset):
    def __init__(self, df, max_len=96):
        self.texts = df['comment_text'].tolist()
        self.labels = df[label_cols].values.astype('float32')
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        enc = tokenizer(
            str(self.texts[idx]),
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt"
        )
        return {
            "input_ids": enc["input_ids"].squeeze(0),
            "attention_mask": enc["attention_mask"].squeeze(0),
            "labels": torch.tensor(self.labels[idx])
        }

train_ds = ToxicDataset(train_df)
val_ds   = ToxicDataset(val_df)

train_dl = DataLoader(train_ds, batch_size=16, shuffle=True)
val_dl   = DataLoader(val_ds, batch_size=32, shuffle=False)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

In [None]:
pos_counts = train_df[label_cols].sum().values
neg_counts = len(train_df) - pos_counts

ratio = neg_counts / pos_counts
ratio_clipped = np.clip(ratio, 1.0, 5.0)

pos_weight = torch.tensor(ratio_clipped, dtype=torch.float32).to(device)
print("pos_weight:", dict(zip(label_cols, ratio_clipped)))


pos_weight: {'toxic': np.float64(5.0), 'severe_toxic': np.float64(5.0), 'obscene': np.float64(5.0), 'threat': np.float64(5.0), 'insult': np.float64(5.0), 'identity_hate': np.float64(5.0)}


In [None]:
model = DistilBertForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    num_labels=len(label_cols)
).to(device)

criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)




model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
from tqdm.auto import tqdm

def train_epoch():
    model.train()
    total = 0
    for batch in train_dl:
        optimizer.zero_grad()
        ids = batch['input_ids'].to(device)
        mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        out = model(ids, attention_mask=mask).logits
        loss = criterion(out, labels)

        loss.backward()
        optimizer.step()
        total += loss.item()
    return total/len(train_dl)

@torch.no_grad()
def val_epoch():
    model.eval()
    total = 0
    probs=[]; true=[]
    for batch in val_dl:
        ids = batch['input_ids'].to(device)
        mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        out = model(ids, attention_mask=mask).logits
        total += criterion(out, labels).item()
        probs.append(torch.sigmoid(out).cpu())
        true.append(labels.cpu())

    return total/len(val_dl), torch.cat(true), torch.cat(probs)


In [None]:
for e in range(3):
    print(f"\nüî• Epoch {e+1}/3")
    tr_loss = train_epoch()
    val_loss, val_labels, val_probs = val_epoch()
    print(f"Train Loss: {tr_loss:.4f} | Val Loss: {val_loss:.4f}")



üî• Epoch 1/3
Train Loss: 0.1320 | Val Loss: 0.1019

üî• Epoch 2/3
Train Loss: 0.0868 | Val Loss: 0.0985

üî• Epoch 3/3
Train Loss: 0.0647 | Val Loss: 0.1305


In [None]:
best_th = {}
for i, col in enumerate(label_cols):
    y = val_labels[:, i].numpy()
    p = val_probs[:, i].numpy()
    best_f1 = 0
    best_t = 0.5
    for t in np.linspace(0.1,0.9,17):
        f = f1_score(y,(p>=t).astype(int),zero_division=0)
        if f > best_f1:
            best_f1 = f
            best_t = t
    best_th[col] = best_t
best_th


{'toxic': np.float64(0.7000000000000001),
 'severe_toxic': np.float64(0.65),
 'obscene': np.float64(0.8),
 'threat': np.float64(0.2),
 'insult': np.float64(0.8),
 'identity_hate': np.float64(0.75)}

In [None]:
PRIORITY = ["threat", "identity_hate", "obscene", "insult", "toxic","severe_toxic"]

def predict_raw(text):
    model.eval()
    with torch.no_grad():
        enc = tokenizer(text, return_tensors="pt",
                        truncation=True, padding="max_length", max_length=96)
        logits = model(**{k:v.to(device) for k,v in enc.items()}).logits
        probs = torch.sigmoid(logits).cpu().numpy()[0]

    out = {}
    for i, col in enumerate(label_cols):
        p = float(probs[i])
        th = float(best_th[col])
        out[col] = {
            "prob": p,
            "threshold": th,
            "over": p - th,
            "active": p >= th
        }
    return out

def choose_main_label(raw):
    active = [lbl for lbl, info in raw.items() if info["active"]]
    if not active:
        return "Non-Toxic"

    def score(lbl):
        info = raw[lbl]
        return (info["over"], -PRIORITY.index(lbl))

    return max(active, key=score)

def predict_final(text):
    raw = predict_raw(text)
    main = choose_main_label(raw)
    details = {lbl: int(info["active"]) for lbl, info in raw.items()}
    return {"classification": main, "details": details, "raw": raw}


In [None]:
print("T1:", predict_final("You are an amazing person, stay blessed!"))
print("T2:", predict_final("You are stupid and useless"))
print("T3:", predict_final("Fuck off idiot"))
print("T4:", predict_final("I will kill you tomorrow"))
print("T5:", predict_final("you are a black idiot"))


T1: {'classification': 'Non-Toxic', 'details': {'toxic': 0, 'severe_toxic': 0, 'obscene': 0, 'threat': 0, 'insult': 0, 'identity_hate': 0}, 'raw': {'toxic': {'prob': 0.001799207180738449, 'threshold': 0.7000000000000001, 'over': -0.6982007928192616, 'active': False}, 'severe_toxic': {'prob': 1.216263171954779e-05, 'threshold': 0.65, 'over': -0.6499878373682805, 'active': False}, 'obscene': {'prob': 0.00025184961850754917, 'threshold': 0.8, 'over': -0.7997481503814925, 'active': False}, 'threat': {'prob': 3.9877188100945204e-05, 'threshold': 0.2, 'over': -0.19996012281189907, 'active': False}, 'insult': {'prob': 0.0005970383062958717, 'threshold': 0.8, 'over': -0.7994029616937042, 'active': False}, 'identity_hate': {'prob': 5.650935781886801e-05, 'threshold': 0.75, 'over': -0.7499434906421811, 'active': False}}}
T2: {'classification': 'toxic', 'details': {'toxic': 1, 'severe_toxic': 0, 'obscene': 1, 'threat': 0, 'insult': 1, 'identity_hate': 0}, 'raw': {'toxic': {'prob': 0.9932142496109