# Universal Restrictor - Model Training
Fine-tune DistilBERT for content moderation

In [None]:
!pip install transformers datasets accelerate scikit-learn -q

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
import json
import torch
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

print(f"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")

In [None]:
data = []
with open('train_unified.jsonl', 'r') as f:
    for line in f:
        data.append(json.loads(line))

print(f"Loaded {len(data)} examples")

label_map = {"safe": 0, "toxic": 1}
texts = [d["text"] for d in data]
labels = [label_map[d["label"]] for d in data]

train_texts, val_texts, train_labels, val_labels = train_test_split(texts, labels, test_size=0.1, random_state=42, stratify=labels)
print(f"Train: {len(train_texts)}, Val: {len(val_texts)}")

In [None]:
MODEL_NAME = "distilbert-base-uncased"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2, id2label={0: "safe", 1: "toxic"}, label2id={"safe": 0, "toxic": 1})
print(f"Model loaded: {MODEL_NAME}")

In [None]:
def tokenize(texts, labels):
    encodings = tokenizer(texts, truncation=True, padding=True, max_length=256)
    return Dataset.from_dict({"input_ids": encodings["input_ids"], "attention_mask": encodings["attention_mask"], "labels": labels})

train_dataset = tokenize(train_texts, train_labels)
val_dataset = tokenize(val_texts, val_labels)
print(f"Tokenized: Train={len(train_dataset)}, Val={len(val_dataset)}")

In [None]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average="binary")
    return {"accuracy": accuracy_score(labels, preds), "f1": f1, "precision": precision, "recall": recall}

In [None]:
training_args = TrainingArguments(
    output_dir="./restrictor-model",
    num_train_epochs=3,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=64,
    warmup_steps=500,
    weight_decay=0.01,
    logging_steps=100,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    fp16=True,
)

trainer = Trainer(model=model, args=training_args, train_dataset=train_dataset, eval_dataset=val_dataset, compute_metrics=compute_metrics)
print("Ready to train!")

In [None]:
trainer.train()

In [None]:
results = trainer.evaluate()
print(f"Accuracy: {results['eval_accuracy']:.4f}")
print(f"F1 Score: {results['eval_f1']:.4f}")
print(f"Precision: {results['eval_precision']:.4f}")
print(f"Recall: {results['eval_recall']:.4f}")

In [None]:
model.save_pretrained("./restrictor-model-final")
tokenizer.save_pretrained("./restrictor-model-final")
print("Model saved!")

In [None]:
from transformers import pipeline

classifier = pipeline("text-classification", model="./restrictor-model-final")

test_texts = ["Hello, how are you?", "I will kill you", "The weather is nice", "You are worthless"]
for text in test_texts:
    result = classifier(text)[0]
    print(f"{text[:30]:30} -> {result['label']} ({result['score']:.3f})")

In [None]:
!zip -r restrictor-model.zip restrictor-model-final/
files.download('restrictor-model.zip')