In [1]:
import pandas as pd

# Load your dataset
df = pd.read_csv("words.csv")

In [2]:
df.head()

Unnamed: 0,risky_words,moderate_word,non_risky_words
0,password,project_plan,apple
1,passcode,project_update,banana
2,secret,project_outline,orange
3,confidential,project_brief,grape
4,classified,project_notes,mango


In [3]:
high_templates = [
    "Critical alert: '{word}' detected — immediate action required.",
    "Severe security breach involving '{word}' found in the system.",
    "Evidence of '{word}' poses a serious threat to operations.",
    "Urgent: '{word}' could lead to catastrophic data loss.",
    "'{word}' identified — potential for major financial and reputational damage.",
    "High‑priority incident: '{word}' may compromise core infrastructure.",
    "Detected '{word}' — situation classified as critical.",
    "Security systems report '{word}' with potential for widespread impact.",
    "Immediate containment advised — '{word}' present in sensitive modules.",
    "System integrity at risk due to '{word}' occurrence."
]

moderate_templates = [
    "Warning: '{word}' may cause issues if left unresolved.",
    "The use of '{word}' could lead to performance or compliance concerns.",
    "Caution: '{word}' might introduce vulnerabilities over time.",
    "Review recommended — '{word}' may not meet current best practices.",
    "'{word}' detected — requires monitoring and possible mitigation.",
    "Potential issue: '{word}' could affect non‑critical systems.",
    "Detected '{word}' — advisable to schedule a security review.",
    "Operational note: '{word}' may degrade efficiency if ignored.",
    "Audit finding: '{word}' should be addressed in upcoming maintenance.",
    "Minor irregularity involving '{word}' — attention suggested."
]

low_templates = [
    "Note: '{word}' is present but poses no immediate concern.",
    "Informational: '{word}' found — considered safe under current conditions.",
    "'{word}' detected — no action needed at this time.",
    "Routine check: '{word}' is compliant with standards.",
    "'{word}' appears in the document — harmless in current context.",
    "Detected '{word}' — no operational impact expected.",
    "System log shows '{word}' — within acceptable parameters.",
    "Observation: '{word}' is part of normal configuration.",
    "No threat detected from '{word}' in current environment.",
    "Standard usage of '{word}' confirmed — no intervention required."
]

In [4]:
import random

def generate_sentence(word, templates):
    return random.choice(templates).format(word=word)

risky_df = pd.DataFrame({
    "text": df["risky_words"].apply(lambda x: generate_sentence(x, high_templates)),
    "label": [0] * len(df)
})

moderate_df = pd.DataFrame({
    "text": df["moderate_word"].apply(lambda x: generate_sentence(x, moderate_templates)),
    "label": [1] * len(df)
})

low_df = pd.DataFrame({
    "text": df["non_risky_words"].apply(lambda x: generate_sentence(x, low_templates)),
    "label": [2] * len(df)
})

# Combine all
full_df = pd.concat([risky_df, moderate_df, low_df], ignore_index=True)

# Shuffle for training
full_df = full_df.sample(frac=1).reset_index(drop=True)

# Preview
print(full_df.head())

                                                text  label
0  'table_runner' appears in the document — harml...      2
1  Caution: 'unit_reference_summary' might introd...      1
2  Operational note: 'project_update' may degrade...      1
3  Severe security breach involving 'db_backup_ke...      0
4  Note: 'stem' is present but poses no immediate...      2


In [7]:
from transformers import DistilBertTokenizerFast

# Load pretrained tokenizer
tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased", local_files_only=True
)

In [8]:
# Tokenize the text column
encodings = tokenizer(
    full_df["text"].tolist(),
    truncation=True,
    padding=True,
    max_length=64,  # You can adjust this based on sentence length
    return_tensors="pt"
)

In [9]:
import torch

labels = torch.tensor(full_df["label"].tolist())

In [10]:
from torch.utils.data import TensorDataset

# Combine into a PyTorch dataset
dataset = TensorDataset(
    encodings["input_ids"],
    encodings["attention_mask"],
    labels
)

In [12]:
from transformers import DistilBertForSequenceClassification

# Load model with 3 output labels (High, Moderate, Low Risk)
model = DistilBertForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    local_files_only=True,
    num_labels=3
)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.weight', 'vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'classifier.bias', 'pre_classi

In [13]:
from torch.utils.data import random_split

# 80/20 split
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

In [14]:
from transformers import TrainingArguments
import transformers

print(transformers.__version__)
print(TrainingArguments.__module__)

4.17.0
transformers.training_args


In [15]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./test_model",
    num_train_epochs=1,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs",
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy"
)

print("TrainingArguments created successfully!")

TrainingArguments created successfully!


In [16]:
import numpy as np
from sklearn.metrics import accuracy_score, f1_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average="macro")
    return {
        "accuracy": acc,
        "f1_macro": f1
    }

In [17]:
from transformers import Trainer

def tuple_to_dict_collator(features):
    input_ids, attention_mask, labels = zip(*features)
    return {
        "input_ids": torch.stack(input_ids),
        "attention_mask": torch.stack(attention_mask),
        "labels": torch.stack(labels)
    }

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
    data_collator=tuple_to_dict_collator
)

In [18]:
trainer.train()

***** Running training *****
  Num examples = 2700
  Num Epochs = 1
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 338


  0%|          | 0/338 [00:00<?, ?it/s]

{'loss': 1.0413, 'learning_rate': 4.85207100591716e-05, 'epoch': 0.03}
{'loss': 0.667, 'learning_rate': 4.7041420118343196e-05, 'epoch': 0.06}
{'loss': 0.2767, 'learning_rate': 4.556213017751479e-05, 'epoch': 0.09}
{'loss': 0.0779, 'learning_rate': 4.408284023668639e-05, 'epoch': 0.12}
{'loss': 0.0233, 'learning_rate': 4.260355029585799e-05, 'epoch': 0.15}
{'loss': 0.0115, 'learning_rate': 4.112426035502959e-05, 'epoch': 0.18}
{'loss': 0.0068, 'learning_rate': 3.964497041420119e-05, 'epoch': 0.21}
{'loss': 0.0053, 'learning_rate': 3.8165680473372784e-05, 'epoch': 0.24}
{'loss': 0.0041, 'learning_rate': 3.668639053254438e-05, 'epoch': 0.27}
{'loss': 0.0036, 'learning_rate': 3.520710059171598e-05, 'epoch': 0.3}
{'loss': 0.0032, 'learning_rate': 3.3727810650887574e-05, 'epoch': 0.33}
{'loss': 0.0029, 'learning_rate': 3.224852071005917e-05, 'epoch': 0.36}
{'loss': 0.0024, 'learning_rate': 3.0769230769230774e-05, 'epoch': 0.38}
{'loss': 0.0021, 'learning_rate': 2.9289940828402368e-05, 'epoc

***** Running Evaluation *****
  Num examples = 675
  Batch size = 8


  0%|          | 0/85 [00:00<?, ?it/s]

Saving model checkpoint to ./test_model\checkpoint-338
Configuration saved in ./test_model\checkpoint-338\config.json


{'eval_loss': 0.0007498149643652141, 'eval_accuracy': 1.0, 'eval_f1_macro': 1.0, 'eval_runtime': 1.0141, 'eval_samples_per_second': 665.643, 'eval_steps_per_second': 83.822, 'epoch': 1.0}


Model weights saved in ./test_model\checkpoint-338\pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./test_model\checkpoint-338 (score: 1.0).


{'train_runtime': 31.546, 'train_samples_per_second': 85.589, 'train_steps_per_second': 10.715, 'train_loss': 0.063754687129834, 'epoch': 1.0}


TrainOutput(global_step=338, training_loss=0.063754687129834, metrics={'train_runtime': 31.546, 'train_samples_per_second': 85.589, 'train_steps_per_second': 10.715, 'train_loss': 0.063754687129834, 'epoch': 1.0})

In [19]:
results = trainer.evaluate()
print("Final Evaluation:", results)

***** Running Evaluation *****
  Num examples = 675
  Batch size = 8


  0%|          | 0/85 [00:00<?, ?it/s]

Final Evaluation: {'eval_loss': 0.0007498149643652141, 'eval_accuracy': 1.0, 'eval_f1_macro': 1.0, 'eval_runtime': 1.1145, 'eval_samples_per_second': 605.655, 'eval_steps_per_second': 76.268, 'epoch': 1.0}


In [20]:
predictions = trainer.predict(test_dataset)
pred_labels = predictions.predictions.argmax(axis=1)

# Compare with true labels
true_labels = predictions.label_ids

# Confusion matrix
from sklearn.metrics import confusion_matrix
print(confusion_matrix(true_labels, pred_labels))

***** Running Prediction *****
  Num examples = 675
  Batch size = 8


  0%|          | 0/85 [00:00<?, ?it/s]

[[213   0   0]
 [  0 240   0]
 [  0   0 222]]


In [22]:
import random

# Alternate templates
templates = [
    "Detected '{word}' — advisable to schedule a security review.",
"Operational note: '{word}' may degrade efficiency if ignored.",
"Observation: '{word}' could lead to minor performance issues.",
"Audit finding: '{word}' should be addressed in upcoming maintenance.",
"System check: '{word}' may require configuration adjustments.",
"Report indicates '{word}' could cause intermittent disruptions.",
"Inspection shows '{word}' present — monitor for changes.",
"Detected '{word}' — recommend follow‑up in next review cycle.",
"Maintenance note: '{word}' might impact secondary processes.",
"Analysis suggests '{word}' could introduce avoidable overhead."
]

# Simple noise words
noise_words = ["please", "urgent", "note", "xyz123", "check", "randomword"]

# Risk label mapping
label_map = {0: "high", 1: "moderate", 2: "low"}

def stress_sentence(word):
    # Pick a random template
    sentence = random.choice(templates).format(word=word)
    # Randomly inject noise
    if random.random() < 0.5:
        insert_pos = random.randint(0, len(sentence.split()))
        words = sentence.split()
        words.insert(insert_pos, random.choice(noise_words))
        sentence = " ".join(words)
    return sentence

# Build stress test dataset from original test set
stress_texts = []
stress_labels = []

for i in range(len(test_dataset)):
    # Extract original tuple from TensorDataset
    input_ids, attention_mask, label = test_dataset[i]
    # Get the original word from your full_df (optional: store mapping earlier)
    # For now, we'll just simulate with placeholder words
    word = f"term{i}"  # Replace with actual mapping if available
    # risk_str = label_map[int(label)]
    stress_texts.append(stress_sentence(word))
    stress_labels.append(int(label))

# Tokenize stress test set
stress_encodings = tokenizer(
    stress_texts,
    truncation=True,
    padding=True,
    max_length=64,
    return_tensors="pt"
)

from torch.utils.data import TensorDataset
stress_dataset = TensorDataset(
    stress_encodings["input_ids"],
    stress_encodings["attention_mask"],
    torch.tensor(stress_labels)
)

# Evaluate on stress test
stress_results = trainer.evaluate(stress_dataset)
print("Stress Test Evaluation:", stress_results)

***** Running Evaluation *****
  Num examples = 675
  Batch size = 8


Stress Test Evaluation: {'eval_loss': 4.38728141784668, 'eval_accuracy': 0.36, 'eval_f1_macro': 0.23266647161283785, 'eval_runtime': 1.2971, 'eval_samples_per_second': 520.391, 'eval_steps_per_second': 65.531, 'epoch': 1.0}


In [23]:
# Save model and tokenizer
model.save_pretrained("./distilbert_risk_model")
tokenizer.save_pretrained("./distilbert_risk_model")

Configuration saved in ./distilbert_risk_model\config.json
Model weights saved in ./distilbert_risk_model\pytorch_model.bin
tokenizer config file saved in ./distilbert_risk_model\tokenizer_config.json
Special tokens file saved in ./distilbert_risk_model\special_tokens_map.json


('./distilbert_risk_model\\tokenizer_config.json',
 './distilbert_risk_model\\special_tokens_map.json',
 './distilbert_risk_model\\vocab.txt',
 './distilbert_risk_model\\added_tokens.json',
 './distilbert_risk_model\\tokenizer.json')

In [34]:
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
import torch

# Load the saved model and tokenizer
tokenizer = DistilBertTokenizerFast.from_pretrained("./distilbert_risk_model")
model = DistilBertForSequenceClassification.from_pretrained("./distilbert_risk_model")

# Map numeric labels back to human-readable classes
label_map = {0: "High Risk", 1: "Moderate Risk", 2: "Low Risk"}

def classify_text(text):
    # Tokenize the input
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=64)
    
    # Run inference without gradient calculation
    with torch.no_grad():
        outputs = model(**inputs)
        predicted_class_id = torch.argmax(outputs.logits, dim=1).item()
    
    return label_map[predicted_class_id]

# Test with any sentence
print(classify_text("I am Sayan Ghosh"))
print(classify_text("Confidential: Military Files"))
print(classify_text("Backdoor detected, requires monitoring and possible mitigation"))
print(classify_text("Potential issue: not required much attention"))
print(classify_text("department_plan is next week"))

Didn't find file ./distilbert_risk_model\added_tokens.json. We won't load it.
loading file ./distilbert_risk_model\vocab.txt
loading file ./distilbert_risk_model\tokenizer.json
loading file None
loading file ./distilbert_risk_model\special_tokens_map.json
loading file ./distilbert_risk_model\tokenizer_config.json
loading configuration file ./distilbert_risk_model\config.json
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForSequenceClassification"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "problem_type": "single_label_classification",
  "qa_dropout": 0.1

Low Risk
High Risk
Moderate Risk
Moderate Risk
Low Risk


In [27]:
preds = trainer.predict(test_dataset)
pred_labels = preds.predictions.argmax(axis=1)
print("Unique predicted classes:", set(pred_labels.tolist()))

***** Running Prediction *****
  Num examples = 675
  Batch size = 8


  0%|          | 0/85 [00:00<?, ?it/s]

Unique predicted classes: {0, 1, 2}


In [28]:
from collections import Counter
preds = trainer.predict(test_dataset)
pred_labels = preds.predictions.argmax(axis=1)
print(Counter(pred_labels.tolist()))

***** Running Prediction *****
  Num examples = 675
  Batch size = 8


Counter({1: 240, 2: 222, 0: 213})
