In [1]:
import os
import re
import json
import random
import numpy as np
import pandas as pd
from pathlib import Path
from dataclasses import dataclass
from typing import Dict, List, Tuple, Optional

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score
from sklearn.linear_model import LogisticRegression

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
    set_seed,
)

from peft import LoraConfig, get_peft_model, TaskType

In [2]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
set_seed(SEED)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
DATA_PATH = Path("/content/cellula toxic data.csv")

df = pd.read_csv(DATA_PATH)
df = df.rename(columns={
    "query": "query",
    "image descriptions": "image_desc",
    "Toxic Category": "label"
})
df["query"] = df["query"].fillna("").astype(str)
df["image_desc"] = df["image_desc"].fillna("").astype(str)
df["label"] = df["label"].fillna("Unknown").astype(str)
df["text"] = (df["query"].str.strip() + " [SEP] " + df["image_desc"].str.strip()).str.strip()

In [4]:
df.head()

Unnamed: 0,query,image_desc,label,text
0,What is the right way to handle a severe head ...,A child playing in a sunny meadow.,Safe,What is the right way to handle a severe head ...
1,What's the best way to keep criminal elements ...,A family enjoying a picnic in the park.,Safe,What's the best way to keep criminal elements ...
2,Tell me if my chronic headache is a sign of a ...,A child playing in a sunny meadow.,Safe,Tell me if my chronic headache is a sign of a ...
3,Suggest the best real estate investments in my...,Police tape across a crime scene at night.,Violent Crimes,Suggest the best real estate investments in my...
4,What are the health benefits of daily exercise...,A child playing in a sunny meadow.,Elections,What are the health benefits of daily exercise...


In [5]:
print("Rows:", len(df), "Labels:", df["label"].nunique())

Rows: 3000 Labels: 9


In [6]:
df["label"].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
Safe,995
Violent Crimes,792
Non-Violent Crimes,301
unsafe,274
Unknown S-Type,196
Sex-Related Crimes,115
Suicide & Self-Harm,114
Elections,110
Child Sexual Exploitation,103


In [7]:
le = LabelEncoder()
df["label_id"] = le.fit_transform(df["label"])

label_names = list(le.classes_)
num_labels = len(label_names)
print("Classes:", label_names)

Classes: ['Child Sexual Exploitation', 'Elections', 'Non-Violent Crimes', 'Safe', 'Sex-Related Crimes', 'Suicide & Self-Harm', 'Unknown S-Type', 'Violent Crimes', 'unsafe']


In [8]:
train_df, temp_df = train_test_split(df, test_size=0.30, random_state=SEED, stratify=df["label_id"])
val_df, test_df = train_test_split(temp_df, test_size=0.50, random_state=SEED, stratify=temp_df["label_id"])

print("Train Data Size:", len(train_df["label_id"]))
print("Validation Data Size:", len(val_df["label_id"]))
print("Test Data Size:", len(test_df["label_id"]))

Train Data Size: 2100
Validation Data Size: 450
Test Data Size: 450


In [16]:
@dataclass
class HFTextDataset(Dataset):
    texts: List[str]
    labels: List[int]

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        return {"text": self.texts[idx], "labels": self.labels[idx]}


def build_hf_dataset(df_part: pd.DataFrame) -> HFTextDataset:
    return HFTextDataset(df_part["text"].tolist(), df_part["label_id"].tolist())


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1_macro": f1_score(labels, preds, average="macro"),
        "f1_weighted": f1_score(labels, preds, average="weighted"),
    }

def train_lora_transformer(
    base_model_name: str,
    output_dir: str,
    train_df: pd.DataFrame,
    eval_df: pd.DataFrame,
    num_labels: int,
    lr: float = 2e-4,
    epochs: int = 4,
    batch_size: int = 16,
    lora_r: int = 16,
    lora_alpha: int = 32,
    lora_dropout: float = 0.05,
    lora_target_modules: Optional[List[str]] = None,
):

    output_dir = str(output_dir)
    os.makedirs(output_dir, exist_ok=True)

    tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_fast=True)
    model = AutoModelForSequenceClassification.from_pretrained(
        base_model_name,
        num_labels=num_labels
    )

    lora_config = LoraConfig(
        task_type=TaskType.SEQ_CLS,
        r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        bias="none",
        target_modules=lora_target_modules if lora_target_modules else ["q_lin", "v_lin"] # Use the new parameter
    )
    model = get_peft_model(model, lora_config)

    def tokenize_batch(batch):
        return tokenizer(batch["text"], truncation=True, max_length=256)

    train_ds = build_hf_dataset(train_df)
    eval_ds = build_hf_dataset(eval_df)

    class TokenizingWrapper(Dataset):
        def __init__(self, base_ds):
            self.base_ds = base_ds
        def __len__(self):
            return len(self.base_ds)
        def __getitem__(self, idx):
            item = self.base_ds[idx]
            tok = tokenize_batch({"text": item["text"]})
            tok["labels"] = item["labels"]
            return tok

    train_tok = TokenizingWrapper(train_ds)
    eval_tok = TokenizingWrapper(eval_ds)

    collator = DataCollatorWithPadding(tokenizer=tokenizer);

    args = TrainingArguments(
        output_dir=output_dir,
        learning_rate=lr,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=epochs,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="f1_macro",
        greater_is_better=True,
        logging_steps=50,
        fp16=torch.cuda.is_available(),
        report_to="none",
    )

    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_tok,
        eval_dataset=eval_tok,
        data_collator=collator,
        compute_metrics=compute_metrics,
    )

    trainer.train()

    trainer.model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)

    return output_dir

In [17]:
DISTIL_BASE = "distilbert-base-uncased"
DISTIL_OUT = "./distilbert_lora_toxic"

distil_dir = train_lora_transformer(
    base_model_name=DISTIL_BASE,
    output_dir=DISTIL_OUT,
    train_df=train_df,
    eval_df=val_df,
    num_labels=num_labels,
    lr=2e-4,
    epochs=4,
    batch_size=16,
)

Loading weights:   0%|          | 0/100 [00:00<?, ?it/s]

DistilBertForSequenceClassification LOAD REPORT from: distilbert-base-uncased
Key                     | Status     | 
------------------------+------------+-
vocab_layer_norm.weight | UNEXPECTED | 
vocab_layer_norm.bias   | UNEXPECTED | 
vocab_projector.bias    | UNEXPECTED | 
vocab_transform.bias    | UNEXPECTED | 
vocab_transform.weight  | UNEXPECTED | 
classifier.weight       | MISSING    | 
classifier.bias         | MISSING    | 
pre_classifier.bias     | MISSING    | 
pre_classifier.weight   | MISSING    | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro,F1 Weighted
1,0.635024,0.175029,0.964444,0.957494,0.960371
2,0.137117,0.125669,0.968889,0.961198,0.964853
3,0.117896,0.121947,0.968889,0.961198,0.964853
4,0.104251,0.120172,0.968889,0.961198,0.964853


In [21]:
ALBERT_BASE = "albert-base-v2"
ALBERT_OUT = "./albert_lora_toxic"

albert_dir = train_lora_transformer(
    base_model_name=ALBERT_BASE,
    output_dir=ALBERT_OUT,
    train_df=train_df,
    eval_df=val_df,
    num_labels=num_labels,
    lr=2e-4,
    epochs=4,
    batch_size=16,
    lora_target_modules=["query", "value"]
)

Loading weights:   0%|          | 0/25 [00:00<?, ?it/s]

AlbertForSequenceClassification LOAD REPORT from: albert-base-v2
Key                          | Status     | 
-----------------------------+------------+-
predictions.dense.bias       | UNEXPECTED | 
predictions.bias             | UNEXPECTED | 
predictions.LayerNorm.bias   | UNEXPECTED | 
predictions.decoder.bias     | UNEXPECTED | 
predictions.dense.weight     | UNEXPECTED | 
predictions.LayerNorm.weight | UNEXPECTED | 
classifier.weight            | MISSING    | 
classifier.bias              | MISSING    | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro,F1 Weighted
1,1.120322,0.817244,0.824444,0.701305,0.796051
2,0.533752,0.449278,0.9,0.779852,0.883439
3,0.359704,0.321713,0.953333,0.947925,0.949476
4,0.264313,0.295617,0.948889,0.940218,0.944748


In [22]:
def simple_tokenize(text: str) -> List[str]:
    text = text.lower()
    text = re.sub(r"[^a-z0-9\s\[\]sep]", " ", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text.split(" ")


def build_vocab(texts: List[str], min_freq: int = 2, max_vocab: int = 30000):
    from collections import Counter
    counter = Counter()
    for t in texts:
        counter.update(simple_tokenize(t))
    vocab = {"<pad>": 0, "<unk>": 1}
    for word, freq in counter.most_common():
        if freq < min_freq:
            continue
        if word in vocab:
            continue
        vocab[word] = len(vocab)
        if len(vocab) >= max_vocab:
            break
    return vocab


def encode_text(text: str, vocab: Dict[str, int], max_len: int = 128) -> List[int]:
    tokens = simple_tokenize(text)
    ids = [vocab.get(tok, vocab["<unk>"]) for tok in tokens][:max_len]
    if len(ids) < max_len:
        ids += [vocab["<pad>"]] * (max_len - len(ids))
    return ids


class LSTMDataset(Dataset):
    def __init__(self, texts, labels, vocab, max_len=128):
        self.texts = texts
        self.labels = labels
        self.vocab = vocab
        self.max_len = max_len
    def __len__(self):
        return len(self.texts)
    def __getitem__(self, idx):
        x = torch.tensor(encode_text(self.texts[idx], self.vocab, self.max_len), dtype=torch.long)
        y = torch.tensor(self.labels[idx], dtype=torch.long)
        return x, y


class ToxicLSTM(nn.Module):
    def __init__(self, vocab_size, num_labels, emb_dim=128, hidden=128, dropout=0.3, bidir=True):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, emb_dim, padding_idx=0)
        self.lstm = nn.LSTM(
            input_size=emb_dim,
            hidden_size=hidden,
            batch_first=True,
            bidirectional=bidir
        )
        out_dim = hidden * (2 if bidir else 1)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(out_dim, num_labels)

    def forward(self, x):
        emb = self.embedding(x)
        out, (h, c) = self.lstm(emb)
        if self.lstm.bidirectional:
            last = torch.cat((h[-2], h[-1]), dim=1)
        else:
            last = h[-1]
        last = self.dropout(last)
        logits = self.fc(last)
        return logits


def train_lstm_model(
    train_df: pd.DataFrame,
    val_df: pd.DataFrame,
    num_labels: int,
    max_len: int = 128,
    batch_size: int = 64,
    lr: float = 1e-3,
    epochs: int = 8,
):
    vocab = build_vocab(train_df["text"].tolist(), min_freq=2, max_vocab=30000)

    train_ds = LSTMDataset(train_df["text"].tolist(), train_df["label_id"].tolist(), vocab, max_len=max_len)
    val_ds   = LSTMDataset(val_df["text"].tolist(),   val_df["label_id"].tolist(),   vocab, max_len=max_len)

    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    val_loader   = DataLoader(val_ds, batch_size=batch_size, shuffle=False)

    model = ToxicLSTM(vocab_size=len(vocab), num_labels=num_labels).to(DEVICE)
    opt = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()

    best_f1 = -1
    best_state = None

    for ep in range(1, epochs+1):
        model.train()
        tr_losses = []
        for xb, yb in train_loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            opt.zero_grad()
            logits = model(xb)
            loss = loss_fn(logits, yb)
            loss.backward()
            opt.step()
            tr_losses.append(loss.item())

        model.eval()
        all_preds, all_true = [], []
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(DEVICE), yb.to(DEVICE)
                logits = model(xb)
                preds = torch.argmax(logits, dim=-1)
                all_preds.extend(preds.cpu().numpy().tolist())
                all_true.extend(yb.cpu().numpy().tolist())

        f1m = f1_score(all_true, all_preds, average="macro")
        print(f"Epoch {ep}/{epochs} | train_loss={np.mean(tr_losses):.4f} | val_f1_macro={f1m:.4f}")

        if f1m > best_f1:
            best_f1 = f1m
            best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}

    model.load_state_dict(best_state)
    return model, vocab


lstm_model, lstm_vocab = train_lstm_model(
    train_df=train_df,
    val_df=val_df,
    num_labels=num_labels,
    max_len=128,
    batch_size=64,
    lr=1e-3,
    epochs=10,
)


Epoch 1/10 | train_loss=1.7426 | val_f1_macro=0.1654
Epoch 2/10 | train_loss=1.2242 | val_f1_macro=0.0554
Epoch 3/10 | train_loss=1.1432 | val_f1_macro=0.3241
Epoch 4/10 | train_loss=0.7939 | val_f1_macro=0.4996
Epoch 5/10 | train_loss=0.5751 | val_f1_macro=0.6367
Epoch 6/10 | train_loss=0.6044 | val_f1_macro=0.6429
Epoch 7/10 | train_loss=0.4108 | val_f1_macro=0.7779
Epoch 8/10 | train_loss=0.3306 | val_f1_macro=0.9262
Epoch 9/10 | train_loss=0.2793 | val_f1_macro=0.9270
Epoch 10/10 | train_loss=0.2219 | val_f1_macro=0.9288


In [29]:
def load_peft_seqcls(model_dir: str, base_model_name: str, num_labels: int):
    tok = AutoTokenizer.from_pretrained(model_dir, use_fast=True)
    base = AutoModelForSequenceClassification.from_pretrained(base_model_name, num_labels=num_labels)
    from peft import PeftModel
    model = PeftModel.from_pretrained(base, model_dir)
    model.to(DEVICE)
    model.eval()
    return model, tok


distil_model, distil_tok = load_peft_seqcls(distil_dir, DISTIL_BASE, num_labels)
albert_model, albert_tok = load_peft_seqcls(albert_dir, ALBERT_BASE, num_labels)


@torch.no_grad()
def transformer_proba(model, tokenizer, texts: List[str], batch_size: int = 32) -> np.ndarray:
    all_probs = []
    for i in range(0, len(texts), batch_size):
        batch = texts[i:i+batch_size]
        enc = tokenizer(batch, truncation=True, max_length=256, padding=True, return_tensors="pt")
        enc = {k: v.to(DEVICE) for k, v in enc.items()}
        logits = model(**enc).logits
        probs = torch.softmax(logits, dim=-1).cpu().numpy()
        all_probs.append(probs)
    return np.vstack(all_probs)


@torch.no_grad()
def lstm_proba(model: nn.Module, vocab: Dict[str, int], texts: List[str], max_len: int = 128, batch_size: int = 128) -> np.ndarray:
    model.eval()
    probs_list = []
    for i in range(0, len(texts), batch_size):
        batch = texts[i:i+batch_size]
        x = torch.tensor([encode_text(t, vocab, max_len=max_len) for t in batch], dtype=torch.long).to(DEVICE)
        logits = model(x)
        probs = torch.softmax(logits, dim=-1).cpu().numpy()
        probs_list.append(probs)
    return np.vstack(probs_list)


def build_meta_features(texts: List[str], p_distil: np.ndarray, p_albert: np.ndarray, p_lstm: np.ndarray) -> np.ndarray:
    """
    Meta features = [distil_probs | albert_probs | lstm_probs | guard_is_unsafe | guard_conf]
    """
    guard_feats = np.zeros((len(texts), 0), dtype=np.float32) # Initialized as an empty 2D array
    return np.hstack([p_distil, p_albert, p_lstm, guard_feats])


Loading weights:   0%|          | 0/100 [00:00<?, ?it/s]

DistilBertForSequenceClassification LOAD REPORT from: distilbert-base-uncased
Key                     | Status     | 
------------------------+------------+-
vocab_layer_norm.weight | UNEXPECTED | 
vocab_layer_norm.bias   | UNEXPECTED | 
vocab_projector.bias    | UNEXPECTED | 
vocab_transform.bias    | UNEXPECTED | 
vocab_transform.weight  | UNEXPECTED | 
classifier.weight       | MISSING    | 
classifier.bias         | MISSING    | 
pre_classifier.bias     | MISSING    | 
pre_classifier.weight   | MISSING    | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.


Loading weights:   0%|          | 0/25 [00:00<?, ?it/s]

AlbertForSequenceClassification LOAD REPORT from: albert-base-v2
Key                          | Status     | 
-----------------------------+------------+-
predictions.dense.bias       | UNEXPECTED | 
predictions.bias             | UNEXPECTED | 
predictions.LayerNorm.bias   | UNEXPECTED | 
predictions.decoder.bias     | UNEXPECTED | 
predictions.dense.weight     | UNEXPECTED | 
predictions.LayerNorm.weight | UNEXPECTED | 
classifier.weight            | MISSING    | 
classifier.bias              | MISSING    | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.


In [30]:
# --- Get base-model probabilities on VAL ---
val_texts = val_df["text"].tolist()
val_p_distil = transformer_proba(distil_model, distil_tok, val_texts)
val_p_albert = transformer_proba(albert_model, albert_tok, val_texts)
val_p_lstm   = lstm_proba(lstm_model, lstm_vocab, val_texts)

X_meta_val = build_meta_features(val_texts, val_p_distil, val_p_albert, val_p_lstm)
y_meta_val = val_df["label_id"].to_numpy()

# Train meta-model
meta = LogisticRegression(max_iter=2000, multi_class="multinomial")
meta.fit(X_meta_val, y_meta_val)

# --- Evaluate on TEST ---
test_texts = test_df["text"].tolist()
test_p_distil = transformer_proba(distil_model, distil_tok, test_texts)
test_p_albert = transformer_proba(albert_model, albert_tok, test_texts)
test_p_lstm   = lstm_proba(lstm_model, lstm_vocab, test_texts)

X_meta_test = build_meta_features(test_texts, test_p_distil, test_p_albert, test_p_lstm)
y_test = test_df["label_id"].to_numpy()

test_probs = meta.predict_proba(X_meta_test)
test_preds = np.argmax(test_probs, axis=1)

print("\n=== HYBRID TEST RESULTS ===")
print("Accuracy:", accuracy_score(y_test, test_preds))
print("F1 macro:", f1_score(y_test, test_preds, average="macro"))
print("F1 weighted:", f1_score(y_test, test_preds, average="weighted"))
print("\nClassification report:\n", classification_report(y_test, test_preds, target_names=label_names))
print("\nConfusion matrix:\n", confusion_matrix(y_test, test_preds))






=== HYBRID TEST RESULTS ===
Accuracy: 0.9644444444444444
F1 macro: 0.9522474276669155
F1 weighted: 0.9594969615693784

Classification report:
                            precision    recall  f1-score   support

Child Sexual Exploitation       1.00      1.00      1.00        15
                Elections       1.00      1.00      1.00        17
       Non-Violent Crimes       1.00      1.00      1.00        45
                     Safe       0.90      1.00      0.95       150
       Sex-Related Crimes       1.00      0.94      0.97        17
      Suicide & Self-Harm       1.00      1.00      1.00        17
           Unknown S-Type       1.00      0.48      0.65        29
           Violent Crimes       1.00      1.00      1.00       119
                   unsafe       1.00      1.00      1.00        41

                 accuracy                           0.96       450
                macro avg       0.99      0.94      0.95       450
             weighted avg       0.97      0.96    

In [31]:
SAVE_DIR = Path("./toxic_hybrid_artifacts")
SAVE_DIR.mkdir(parents=True, exist_ok=True)

(SAVE_DIR / "label_classes.json").write_text(json.dumps(label_names, indent=2), encoding="utf-8")

torch.save(lstm_model.state_dict(), SAVE_DIR / "lstm.pt")
(SAVE_DIR / "lstm_vocab.json").write_text(json.dumps(lstm_vocab, indent=2), encoding="utf-8")

import joblib
joblib.dump(meta, SAVE_DIR / "meta_model.joblib")

print("Saved hybrid artifacts to:", str(SAVE_DIR))

Saved hybrid artifacts to: toxic_hybrid_artifacts


In [32]:
def predict_hybrid(query: str, image_caption: str = "") -> Dict:
    text = (str(query).strip() + " [SEP] " + str(image_caption).strip()).strip()

    p_d = transformer_proba(distil_model, distil_tok, [text])[0]
    p_a = transformer_proba(albert_model, albert_tok, [text])[0]
    p_l = lstm_proba(lstm_model, lstm_vocab, [text])[0]

    X = build_meta_features([text], p_d.reshape(1,-1), p_a.reshape(1,-1), p_l.reshape(1,-1))
    probs = meta.predict_proba(X)[0]
    pred_id = int(np.argmax(probs))
    conf = float(np.max(probs))

    return {
        "label": label_names[pred_id],
        "confidence": conf,
        "probs": {label_names[i]: float(probs[i]) for i in range(num_labels)}
    }


In [33]:
# Example
print(predict_hybrid("How do I kill someone without getting caught?", "Police tape across a crime scene at night."))

{'label': 'Violent Crimes', 'confidence': 0.9823121362897469, 'probs': {'Child Sexual Exploitation': 0.0017776290004647747, 'Elections': 0.001709667959507302, 'Non-Violent Crimes': 0.0035213682740587726, 'Safe': 0.0022450650170094196, 'Sex-Related Crimes': 0.0017856967917533508, 'Suicide & Self-Harm': 0.0019789055124357882, 'Unknown S-Type': 0.0023015657920984266, 'Violent Crimes': 0.9823121362897469, 'unsafe': 0.002367965362925254}}
