In [1]:
from datasets import DatasetDict, load_dataset, concatenate_datasets
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, DataCollatorWithPadding
import evaluate
import numpy as np
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using MPS for training.")
else:
    device = torch.device("cpu")
    print("MPS not available, using CPU.")

Using MPS for training.


In [3]:
dataset = load_dataset("ahsanayub/malicious-prompts").remove_columns(["id", "source"]).filter(lambda x: x["text"] is not None and x["label"] in [0, 1])

label_0 = dataset["train"].filter(lambda x: x["label"] == 0)
label_1 = dataset["train"].filter(lambda x: x["label"] == 1)

n_train = 10000
n_test = 3000
train_per_class = n_train // 2
test_per_class = n_test // 2

label_0 = label_0.shuffle(seed=42)
label_1 = label_1.shuffle(seed=42)

train_0 = label_0.select(range(train_per_class))
train_1 = label_1.select(range(train_per_class))
test_0  = label_0.select(range(train_per_class, train_per_class + test_per_class))
test_1  = label_1.select(range(train_per_class, train_per_class + test_per_class))

train_dataset = concatenate_datasets([train_0, train_1]).shuffle(seed=42)
test_dataset  = concatenate_datasets([test_0, test_1]).shuffle(seed=42)

dataset = DatasetDict({
    "train": train_dataset,
    "test": test_dataset
})

In [4]:
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 10000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 3000
    })
})

In [5]:
tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-uncased", num_labels=2).to(device)

for name, parameter in model.base_model.named_parameters():
    if "poller" not in name:
        parameter.requires_grad = False

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
def tokenize_str(string):
    return tokenizer(string['text'], truncation=True)

tokenized_datasets = dataset.map(tokenize_str, batched=True)

Map: 100%|██████████| 3000/3000 [00:00<00:00, 7855.60 examples/s]


In [7]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [8]:
precision_metric = evaluate.load("precision")
recall_metric = evaluate.load("recall")
f1_metric = evaluate.load("f1")
accuracy_metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    probabilities = np.exp(predictions) / np.exp(predictions).sum(-1, keepdims=True)
    predicted_classes = np.argmax(predictions, axis=1)
    
    precision = np.round(precision_metric.compute(predictions=predicted_classes, references=labels, average='binary')['precision'], 3)
    recall = np.round(recall_metric.compute(predictions=predicted_classes, references=labels, average='binary')['recall'], 3)
    f1 = np.round(f1_metric.compute(predictions=predicted_classes, references=labels, average='binary')['f1'], 3)
    accuracy = np.round(accuracy_metric.compute(predictions=predicted_classes, references=labels)['accuracy'], 3)
    
    return {"precision": precision, "recall": recall, "f1": f1, "accuracy": accuracy}

In [9]:
training_args = TrainingArguments(
    output_dir="./results",
    learning_rate=2e-4,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=8,
    logging_strategy="epoch",
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)

In [10]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.6898,0.68756,0.519,0.979,0.678,0.536
2,0.6766,0.669509,0.571,0.779,0.659,0.597
3,0.6687,0.663843,0.581,0.725,0.645,0.601
4,0.6662,0.660873,0.576,0.757,0.654,0.599
5,0.6634,0.668605,0.562,0.911,0.695,0.6
6,0.6625,0.65726,0.58,0.777,0.664,0.607
7,0.6581,0.656283,0.582,0.764,0.661,0.608
8,0.6572,0.656333,0.579,0.787,0.667,0.608




TrainOutput(global_step=2504, training_loss=0.667813145695403, metrics={'train_runtime': 3064.0769, 'train_samples_per_second': 26.109, 'train_steps_per_second': 0.817, 'total_flos': 2.10253688782272e+16, 'train_loss': 0.667813145695403, 'epoch': 8.0})

In [11]:
trainer.save_model("saved_model")
tokenizer.save_pretrained("saved_model")

('saved_model/tokenizer_config.json',
 'saved_model/special_tokens_map.json',
 'saved_model/vocab.txt',
 'saved_model/added_tokens.json',
 'saved_model/tokenizer.json')