In [1]:
import pandas as pd

# Load your dataset
df = pd.read_csv("words.csv")

In [2]:
df.head()

Unnamed: 0,risky_words,moderate_word,non_risky_words
0,password,project_plan,apple
1,passcode,project_update,banana
2,secret,project_outline,orange
3,confidential,project_brief,grape
4,classified,project_notes,mango


In [3]:
def generate_sentence(word, risk_level):
    return f"This document contains the term '{word}', which may indicate {risk_level} risk."

# Apply to each column
df["sentence_risky"] = df["risky_words"].apply(lambda x: generate_sentence(x, "high"))
df["sentence_moderate"] = df["moderate_word"].apply(lambda x: generate_sentence(x, "moderate"))
df["sentence_low"] = df["non_risky_words"].apply(lambda x: generate_sentence(x, "low"))

In [4]:
# Create labeled datasets
risky_df = pd.DataFrame({
    "text": df["sentence_risky"],
    "label": [0] * len(df)  # High Risk = 0
})

moderate_df = pd.DataFrame({
    "text": df["sentence_moderate"],
    "label": [1] * len(df)  # Moderate Risk = 1
})

low_df = pd.DataFrame({
    "text": df["sentence_low"],
    "label": [2] * len(df)  # Low Risk = 2
})

# Combine all
full_df = pd.concat([risky_df, moderate_df, low_df], ignore_index=True)

# Shuffle for training
full_df = full_df.sample(frac=1).reset_index(drop=True)

# Preview
print(full_df.head())

                                                text  label
0  This document contains the term 'customer_segm...      0
1  This document contains the term 'db_service', ...      0
2  This document contains the term 'policy_handbo...      1
3  This document contains the term 'pickle', whic...      2
4  This document contains the term 'taco', which ...      2


In [5]:
from transformers import DistilBertTokenizerFast

# Load pretrained tokenizer
tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")

Downloading:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/483 [00:00<?, ?B/s]

In [6]:
# Tokenize the text column
encodings = tokenizer(
    full_df["text"].tolist(),
    truncation=True,
    padding=True,
    max_length=64,  # You can adjust this based on sentence length
    return_tensors="pt"
)

In [7]:
import torch

labels = torch.tensor(full_df["label"].tolist())

In [8]:
from torch.utils.data import TensorDataset

# Combine into a PyTorch dataset
dataset = TensorDataset(
    encodings["input_ids"],
    encodings["attention_mask"],
    labels
)

In [9]:
from transformers import DistilBertForSequenceClassification

# Load model with 3 output labels (High, Moderate, Low Risk)
model = DistilBertForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    num_labels=3
)

Downloading:   0%|          | 0.00/256M [00:00<?, ?B/s]

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_projector.weight', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'classifier.bias', 'pre_classi

In [10]:
from torch.utils.data import random_split

# 80/20 split
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

In [11]:
from transformers import TrainingArguments
import transformers

print(transformers.__version__)
print(TrainingArguments.__module__)

4.17.0
transformers.training_args


In [12]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./test_model",
    num_train_epochs=1,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs",
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy"
)

print("TrainingArguments created successfully!")

TrainingArguments created successfully!


In [13]:
import numpy as np
from sklearn.metrics import accuracy_score, f1_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average="macro")
    return {
        "accuracy": acc,
        "f1_macro": f1
    }

In [16]:
from transformers import Trainer

def tuple_to_dict_collator(features):
    input_ids, attention_mask, labels = zip(*features)
    return {
        "input_ids": torch.stack(input_ids),
        "attention_mask": torch.stack(attention_mask),
        "labels": torch.stack(labels)
    }

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
    data_collator=tuple_to_dict_collator
)

In [17]:
trainer.train()

***** Running training *****
  Num examples = 2700
  Num Epochs = 1
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 338


  0%|          | 0/338 [00:00<?, ?it/s]

{'loss': 1.0978, 'learning_rate': 4.85207100591716e-05, 'epoch': 0.03}
{'loss': 0.9298, 'learning_rate': 4.7041420118343196e-05, 'epoch': 0.06}
{'loss': 0.5561, 'learning_rate': 4.556213017751479e-05, 'epoch': 0.09}
{'loss': 0.1352, 'learning_rate': 4.408284023668639e-05, 'epoch': 0.12}
{'loss': 0.0321, 'learning_rate': 4.260355029585799e-05, 'epoch': 0.15}
{'loss': 0.0129, 'learning_rate': 4.112426035502959e-05, 'epoch': 0.18}
{'loss': 0.0075, 'learning_rate': 3.964497041420119e-05, 'epoch': 0.21}
{'loss': 0.0055, 'learning_rate': 3.8165680473372784e-05, 'epoch': 0.24}
{'loss': 0.0045, 'learning_rate': 3.668639053254438e-05, 'epoch': 0.27}
{'loss': 0.0036, 'learning_rate': 3.520710059171598e-05, 'epoch': 0.3}
{'loss': 0.0031, 'learning_rate': 3.3727810650887574e-05, 'epoch': 0.33}
{'loss': 0.003, 'learning_rate': 3.224852071005917e-05, 'epoch': 0.36}
{'loss': 0.0025, 'learning_rate': 3.0769230769230774e-05, 'epoch': 0.38}
{'loss': 0.0022, 'learning_rate': 2.9289940828402368e-05, 'epoc

***** Running Evaluation *****
  Num examples = 675
  Batch size = 8


  0%|          | 0/85 [00:00<?, ?it/s]

Saving model checkpoint to ./test_model\checkpoint-338
Configuration saved in ./test_model\checkpoint-338\config.json


{'eval_loss': 0.000771350518334657, 'eval_accuracy': 1.0, 'eval_f1_macro': 1.0, 'eval_runtime': 1.1965, 'eval_samples_per_second': 564.165, 'eval_steps_per_second': 71.043, 'epoch': 1.0}


Model weights saved in ./test_model\checkpoint-338\pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./test_model\checkpoint-338 (score: 1.0).


{'train_runtime': 33.6705, 'train_samples_per_second': 80.189, 'train_steps_per_second': 10.038, 'train_loss': 0.08354047423548247, 'epoch': 1.0}


TrainOutput(global_step=338, training_loss=0.08354047423548247, metrics={'train_runtime': 33.6705, 'train_samples_per_second': 80.189, 'train_steps_per_second': 10.038, 'train_loss': 0.08354047423548247, 'epoch': 1.0})

In [18]:
results = trainer.evaluate()
print("Final Evaluation:", results)

***** Running Evaluation *****
  Num examples = 675
  Batch size = 8


  0%|          | 0/85 [00:00<?, ?it/s]

Final Evaluation: {'eval_loss': 0.000771350518334657, 'eval_accuracy': 1.0, 'eval_f1_macro': 1.0, 'eval_runtime': 1.2479, 'eval_samples_per_second': 540.893, 'eval_steps_per_second': 68.112, 'epoch': 1.0}


In [19]:
predictions = trainer.predict(test_dataset)
pred_labels = predictions.predictions.argmax(axis=1)

# Compare with true labels
true_labels = predictions.label_ids

# Confusion matrix
from sklearn.metrics import confusion_matrix
print(confusion_matrix(true_labels, pred_labels))

***** Running Prediction *****
  Num examples = 675
  Batch size = 8


  0%|          | 0/85 [00:00<?, ?it/s]

[[225   0   0]
 [  0 203   0]
 [  0   0 247]]


In [20]:
import random

# Alternate templates
templates = [
    "The presence of '{word}' in this file suggests {risk} risk.",
    "Detected term: '{word}' — classified as {risk} risk.",
    "This text contains '{word}', indicating a {risk} risk category.",
    "We found '{word}' here, which could mean {risk} risk.",
    "'{word}' appears in the document, suggesting {risk} risk."
]

# Simple noise words
noise_words = ["please", "urgent", "note", "xyz123", "check", "randomword"]

# Risk label mapping
label_map = {0: "high", 1: "moderate", 2: "low"}

def stress_sentence(word, risk):
    # Pick a random template
    sentence = random.choice(templates).format(word=word, risk=risk)
    # Randomly inject noise
    if random.random() < 0.5:
        insert_pos = random.randint(0, len(sentence.split()))
        words = sentence.split()
        words.insert(insert_pos, random.choice(noise_words))
        sentence = " ".join(words)
    return sentence

# Build stress test dataset from original test set
stress_texts = []
stress_labels = []

for i in range(len(test_dataset)):
    # Extract original tuple from TensorDataset
    input_ids, attention_mask, label = test_dataset[i]
    # Get the original word from your full_df (optional: store mapping earlier)
    # For now, we'll just simulate with placeholder words
    word = f"term{i}"  # Replace with actual mapping if available
    risk_str = label_map[int(label)]
    stress_texts.append(stress_sentence(word, risk_str))
    stress_labels.append(int(label))

# Tokenize stress test set
stress_encodings = tokenizer(
    stress_texts,
    truncation=True,
    padding=True,
    max_length=64,
    return_tensors="pt"
)

from torch.utils.data import TensorDataset
stress_dataset = TensorDataset(
    stress_encodings["input_ids"],
    stress_encodings["attention_mask"],
    torch.tensor(stress_labels)
)

# Evaluate on stress test
stress_results = trainer.evaluate(stress_dataset)
print("Stress Test Evaluation:", stress_results)

***** Running Evaluation *****
  Num examples = 675
  Batch size = 8


Stress Test Evaluation: {'eval_loss': 0.000859028659760952, 'eval_accuracy': 1.0, 'eval_f1_macro': 1.0, 'eval_runtime': 1.095, 'eval_samples_per_second': 616.426, 'eval_steps_per_second': 77.624, 'epoch': 1.0}


In [21]:
# Save model and tokenizer
model.save_pretrained("./distilbert_risk_model")
tokenizer.save_pretrained("./distilbert_risk_model")

Configuration saved in ./distilbert_risk_model\config.json
Model weights saved in ./distilbert_risk_model\pytorch_model.bin
tokenizer config file saved in ./distilbert_risk_model\tokenizer_config.json
Special tokens file saved in ./distilbert_risk_model\special_tokens_map.json


('./distilbert_risk_model\\tokenizer_config.json',
 './distilbert_risk_model\\special_tokens_map.json',
 './distilbert_risk_model\\vocab.txt',
 './distilbert_risk_model\\added_tokens.json',
 './distilbert_risk_model\\tokenizer.json')

In [24]:
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
import torch

# Load the saved model and tokenizer
tokenizer = DistilBertTokenizerFast.from_pretrained("./distilbert_risk_model")
model = DistilBertForSequenceClassification.from_pretrained("./distilbert_risk_model")

# Map numeric labels back to human-readable classes
label_map = {0: "High Risk", 1: "Moderate Risk", 2: "Low Risk"}

def classify_text(text):
    # Tokenize the input
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=64)
    
    # Run inference without gradient calculation
    with torch.no_grad():
        outputs = model(**inputs)
        predicted_class_id = torch.argmax(outputs.logits, dim=1).item()
    
    return label_map[predicted_class_id]

# Test with any sentence
print(classify_text("I am Sayan Ghosh"))
print(classify_text("Confidential: Military Files"))
print(classify_text("The presence of outdated encryption protocols in this system may indicate a moderate risk to data security."))
print(classify_text("This document contains the term 'butcher'"))

Didn't find file ./distilbert_risk_model\added_tokens.json. We won't load it.
loading file ./distilbert_risk_model\vocab.txt
loading file ./distilbert_risk_model\tokenizer.json
loading file None
loading file ./distilbert_risk_model\special_tokens_map.json
loading file ./distilbert_risk_model\tokenizer_config.json
loading configuration file ./distilbert_risk_model\config.json
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForSequenceClassification"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "problem_type": "single_label_classification",
  "qa_dropout": 0.1

Low Risk
High Risk
Moderate Risk
High Risk
