# DeBERTa Cross-Encoder Pairwise (Margin-Ranking) Notebook - Training

* ###   Purpose: Train a DeBERTa-based cross-encoder that, given (rule + target_comment) and an example
* ###   (positive OR negative), produces a scalar score. We train with a margin ranking loss so that
  * ####   Score(target, positive) > score(target, negative) + margin.
 
* The notebook reads train.csv, converts each row into multiple (text_a, pos_example, neg_example)
  triples, trains the model, and saves the trained model/tokenizer into `saved_model/` which can be
  re-used in aother notebook via AutoModelForSequenceClassification.from_pretrained('saved_model').

In [1]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, IntervalStrategy
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import numpy as np

2025-10-16 17:12:14.581074: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1760634734.790684      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1760634734.853801      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
def load_data(train_path: str):
    df = pd.read_csv(train_path)
    # Keep rows with valid examples
    df = df.dropna(subset=["positive_example_1", "negative_example_1", "body", "rule"])
    return df

In [3]:
class RuleViolationPairs(Dataset):
    def __init__(self, df, tokenizer, max_len=256):
        self.tokenizer = tokenizer
        self.samples = []

        for _, r in df.iterrows():
            rule = r["rule"]
            target = r["body"]
            subreddit = str(r.get("subreddit", "")).strip()

            text_a = f"Subreddit: {subreddit} || Rule: {rule} || Target: {target}"
            pos_examples = [r.get("positive_example_1"), r.get("positive_example_2")]
            neg_examples = [r.get("negative_example_1"), r.get("negative_example_2")]

            # Clean
            pos_examples = [p for p in pos_examples if isinstance(p, str) and p.strip()]
            neg_examples = [n for n in neg_examples if isinstance(n, str) and n.strip()]

            for p in pos_examples:
                self.samples.append((text_a, p, 1))
            for n in neg_examples:
                self.samples.append((text_a, n, 0))

        self.max_len = max_len

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        text_a, text_b, label = self.samples[idx]
        enc = self.tokenizer(
            text_a,
            text_b,
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt",
        )
        enc = {k: v.squeeze(0) for k, v in enc.items()}
        enc["labels"] = torch.tensor(label, dtype=torch.long)
        return enc


In [4]:
def train_model(train_df, model_name="microsoft/deberta-v3-base", output_dir="saved_model"):
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Split dataset
    train_split, val_split = train_test_split(train_df, test_size=0.1, random_state=42)
    train_ds = RuleViolationPairs(train_split, tokenizer)
    val_ds = RuleViolationPairs(val_split, tokenizer)

    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

    training_args = TrainingArguments(
        output_dir=output_dir,
        eval_strategy=IntervalStrategy.EPOCH,
        save_strategy=IntervalStrategy.EPOCH,
        learning_rate=2e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=2,
        weight_decay=0.01,
        load_best_model_at_end=True,
        metric_for_best_model="eval_loss",
        greater_is_better=False
    )

    def compute_metrics(eval_pred):
        logits, labels = eval_pred
        probs = torch.softmax(torch.tensor(logits), dim=-1)[:, 1].numpy()
        auc = roc_auc_score(labels, probs)
        return {"roc_auc": auc}

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_ds,
        eval_dataset=val_ds,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
    )

    trainer.train()
    trainer.save_model(output_dir)
    tokenizer.save_pretrained(output_dir)

    print(f"✅ Model saved to {output_dir}")
    return model, tokenizer

In [5]:
def predict_violation(model, tokenizer, df_test, device="cuda"):
    model.to(device)
    model.eval()

    preds = []
    for _, r in df_test.iterrows():
        rule = r["rule"]
        target = r["body"]
        subreddit = str(r.get("subreddit", "")).strip()
        pos_ex = [r.get("positive_example_1"), r.get("positive_example_2")]
        neg_ex = [r.get("negative_example_1"), r.get("negative_example_2")]
        pos_ex = [p for p in pos_ex if isinstance(p, str) and p.strip()]
        neg_ex = [n for n in neg_ex if isinstance(n, str) and n.strip()]

        # If no examples, skip to 0.5 neutral prediction
        if not pos_ex and not neg_ex:
            preds.append(0.5)
            continue

        rule_target = f"Subreddit: {subreddit} || Rule: {rule} || Target: {target}"
        scores = []

        with torch.no_grad():
            for ex in pos_ex + neg_ex:
                inputs = tokenizer(rule_target, ex, return_tensors="pt", truncation=True, padding=True, max_length=256).to(device)
                outputs = model(**inputs)
                prob = torch.softmax(outputs.logits, dim=-1)[0, 1].item()
                scores.append(prob)

        preds.append(np.mean(scores))

    return preds

In [6]:
# train_path = "/kaggle/input/jigsaw-agile-community-rules/train.csv"
# df_train = load_data(train_path)

In [7]:
# df_train.head(2)

In [8]:
if __name__ == "__main__":
    train_path = "/kaggle/input/jigsaw-agile-community-rules/train.csv"  # path to your dataset
    df_train = load_data(train_path)

    # model, tokenizer = train_model(df_train)

In [9]:
train_df = df_train
model_name="microsoft/deberta-v3-base"
output_dir="saved_model"

In [10]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]



In [11]:
train_split, val_split = train_test_split(train_df, test_size=0.05, random_state=42)

In [12]:
train_split.shape, val_split.shape

((1927, 9), (102, 9))

In [13]:
train_ds = RuleViolationPairs(train_split, tokenizer)
val_ds = RuleViolationPairs(val_split, tokenizer)

In [14]:
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

pytorch_model.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/371M [00:00<?, ?B/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
training_args = TrainingArguments(
        output_dir=output_dir,
        eval_strategy=IntervalStrategy.EPOCH,
        save_strategy=IntervalStrategy.EPOCH,
        learning_rate=2e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=2,
        weight_decay=0.01,
        load_best_model_at_end=True,
        report_to=[],
        metric_for_best_model="eval_loss",
        greater_is_better=False
    )

In [16]:
def compute_metrics(eval_pred):
        logits, labels = eval_pred
        probs = torch.softmax(torch.tensor(logits), dim=-1)[:, 1].numpy()
        auc = roc_auc_score(labels, probs)
        return {"roc_auc": auc}

In [17]:
trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_ds,
        eval_dataset=val_ds,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
    )

  trainer = Trainer(


In [18]:
trainer.train()
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)
 
print(f"✅ Model saved to {output_dir}")



Epoch,Training Loss,Validation Loss,Roc Auc
1,No log,0.122281,0.994738
2,0.312300,0.090279,0.997861




✅ Model saved to saved_model


In [19]:
# training_args = TrainingArguments(
#         output_dir=output_dir,
#         eval_strategy=IntervalStrategy.EPOCH,
#         save_strategy=IntervalStrategy.EPOCH,
#         learning_rate=2e-5,
#         per_device_train_batch_size=8,
#         per_device_eval_batch_size=8,
#         num_train_epochs=2,
#         weight_decay=0.01,
#         load_best_model_at_end=True,
#         metric_for_best_model="eval_loss",
#         greater_is_better=False
#     )

In [20]:
# def compute_metrics(eval_pred):
#         logits, labels = eval_pred
#         probs = torch.softmax(torch.tensor(logits), dim=-1)[:, 1].numpy()
#         auc = roc_auc_score(labels, probs)
#         return {"roc_auc": auc}

# trainer = Trainer(
#     model=model,
#     args=training_args,
#     train_dataset=train_ds,
#     eval_dataset=val_ds,
#     tokenizer=tokenizer,
#     compute_metrics=compute_metrics,
# )

# trainer.train()

In [21]:
# trainer.save_model(output_dir)
# tokenizer.save_pretrained(output_dir)

# print(f"✅ Model saved to {output_dir}")