In [None]:
!pip install -q transformers datasets scikit-learn torch pandas

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m487.4/487.4 kB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m63.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m66.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m49.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from google.colab import files
uploaded = files.upload()

Saving question_dataset_filtered.csv to question_dataset_filtered.csv


In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report
from transformers import (
    BertTokenizer, BertForSequenceClassification, AdamW,
    get_scheduler, logging
)

# Disable transformers logging
logging.set_verbosity_error()

# Load dataset
df = pd.read_csv("question_dataset_filtered.csv")
df = df.drop_duplicates()
df['label'] = df['label'].map({'acceptable': 1, 'not_acceptable': 0})

# Split dataset: stratified
df_train_val, df_test = train_test_split(df, test_size=0.1, random_state=42, stratify=df['label'])
train_texts, val_texts, train_labels, val_labels = train_test_split(
    df_train_val['text'].tolist(), df_train_val['label'].tolist(),
    test_size=0.2, random_state=42, stratify=df_train_val['label'])
test_texts = df_test['text'].tolist()
test_labels = df_test['label'].tolist()

# Tokenization
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=128)
val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=128)
test_encodings = tokenizer(test_texts, truncation=True, padding=True, max_length=128)

# Dataset class
class QuestionDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    def __getitem__(self, idx):
        return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()} | {
            'labels': torch.tensor(self.labels[idx])
        }
    def __len__(self):
        return len(self.labels)

train_dataset = QuestionDataset(train_encodings, train_labels)
val_dataset = QuestionDataset(val_encodings, val_labels)
test_dataset = QuestionDataset(test_encodings, test_labels)

# Model setup
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Compute class weights
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_labels),
    y=train_labels
)
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(device)
loss_fn = nn.CrossEntropyLoss(weight=class_weights_tensor)

# Optimizer and scheduler
optimizer = AdamW(model.parameters(), lr=5e-5)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)
test_loader = DataLoader(test_dataset, batch_size=16)
num_training_steps = len(train_loader) * 3
lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

# Training loop
model.train()
for epoch in range(3):
    total_loss = 0
    for batch in train_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        logits = outputs.logits
        loss = loss_fn(logits, batch['labels'])  # Use weighted loss
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
        total_loss += loss.item()
    print(f"Epoch {epoch + 1} Loss: {total_loss / len(train_loader):.4f}")

# Validation evaluation
model.eval()
predictions, true_labels = [], []
with torch.no_grad():
    for batch in val_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        logits = outputs.logits
        preds = torch.argmax(logits, dim=1).cpu().numpy()
        labels = batch['labels'].cpu().numpy()
        predictions.extend(preds)
        true_labels.extend(labels)

print("\n📊 Validation Set Evaluation:")
print(classification_report(true_labels, predictions, target_names=["not_acceptable", "acceptable"]))

# Test set evaluation
predictions, true_labels = [], []
with torch.no_grad():
    for batch in test_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        logits = outputs.logits
        preds = torch.argmax(logits, dim=1).cpu().numpy()
        labels = batch['labels'].cpu().numpy()
        predictions.extend(preds)
        true_labels.extend(labels)

print("\n🧪 Held-Out Test Set Evaluation:")
print(classification_report(true_labels, predictions, target_names=["not_acceptable", "acceptable"]))


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]



Epoch 1 Loss: 0.0198
Epoch 2 Loss: 0.0002
Epoch 3 Loss: 0.0001

📊 Validation Set Evaluation:
                precision    recall  f1-score   support

not_acceptable       1.00      1.00      1.00       196
    acceptable       1.00      1.00      1.00      1206

      accuracy                           1.00      1402
     macro avg       1.00      1.00      1.00      1402
  weighted avg       1.00      1.00      1.00      1402


🧪 Held-Out Test Set Evaluation:
                precision    recall  f1-score   support

not_acceptable       1.00      1.00      1.00       109
    acceptable       1.00      1.00      1.00       670

      accuracy                           1.00       779
     macro avg       1.00      1.00      1.00       779
  weighted avg       1.00      1.00      1.00       779



In [None]:
from transformers import BertTokenizer

model_save_path = "saved_bert_model"
tokenizer_save_path = "saved_bert_tokenizer"

# Save model and tokenizer
model.save_pretrained(model_save_path)
tokenizer.save_pretrained(tokenizer_save_path)

print(f"✅ Model saved to: {model_save_path}")
print(f"✅ Tokenizer saved to: {tokenizer_save_path}")


✅ Model saved to: saved_bert_model
✅ Tokenizer saved to: saved_bert_tokenizer


In [None]:
!pip install -q huggingface_hub

In [None]:
from huggingface_hub import login

login(token="")

In [None]:
from transformers import BertForSequenceClassification, BertTokenizer

# Load from saved directory (optional if already in memory)
model = BertForSequenceClassification.from_pretrained("saved_bert_model")
tokenizer = BertTokenizer.from_pretrained("saved_bert_tokenizer")

# Push to Hugging Face Hub (replace with your username/model-name)
hub_repo_name = "Sajeevan2001/bert-question-moderation"

model.push_to_hub(hub_repo_name)
tokenizer.push_to_hub(hub_repo_name)

print(f"🚀 Successfully pushed to https://huggingface.co/{hub_repo_name}")


model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

🚀 Successfully pushed to https://huggingface.co/Sajeevan2001/bert-question-moderation


In [None]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the saved model and tokenizer
model = BertForSequenceClassification.from_pretrained("saved_bert_model")
tokenizer = BertTokenizer.from_pretrained("saved_bert_tokenizer")
model.to(device)
model.eval()

# Custom input testing
custom_texts = [
    "How can I cheat on my final exam undetected?",
    "What is the best method to study for biology exams?",
    "How to fake documents to get into clubs?",
    "Explain Newton’s laws of motion.",
    "How to make a harmless prank at school?",
    "Steps to cause maximum damage without being caught.",
]

# Tokenize inputs
custom_encodings = tokenizer(custom_texts, truncation=True, padding=True, max_length=128, return_tensors='pt')
custom_encodings = {k: v.to(device) for k, v in custom_encodings.items()}

# Predict
with torch.no_grad():
    outputs = model(**custom_encodings)
    logits = outputs.logits
    predictions = torch.argmax(logits, dim=1).cpu().numpy()

# Map predictions to labels
label_map = {0: "not_acceptable", 1: "acceptable"}

# Display predictions
for text, pred in zip(custom_texts, predictions):
    print(f"\n📝 Text: {text}\n➡️ Predicted label: {label_map[pred]}")



📝 Text: How can I cheat on my final exam undetected?
➡️ Predicted label: not_acceptable

📝 Text: What is the best method to study for biology exams?
➡️ Predicted label: acceptable

📝 Text: How to fake documents to get into clubs?
➡️ Predicted label: not_acceptable

📝 Text: Explain Newton’s laws of motion.
➡️ Predicted label: acceptable

📝 Text: How to make a harmless prank at school?
➡️ Predicted label: not_acceptable

📝 Text: Steps to cause maximum damage without being caught.
➡️ Predicted label: not_acceptable
