In [1]:
from datasets import DatasetDict, Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification,TrainingArguments, Trainer
import evaluate
import numpy as np
from transformers import DataCollatorWithPadding
from datasets import load_dataset

In [2]:
dataset_dict = load_dataset("shawhin/phishing-site-classification")

In [3]:
# define pre-trained model path
model_path = "google-bert/bert-base-uncased"
# load model tokenizer
tokenizer = AutoTokenizer.from_pretrained (model_path)
# load model with binary classification head
id2label = {0: "Safe", 1: "Not Safe"}
label2id = {"Safe": 0, "Not Safe": 1}
model = AutoModelForSequenceClassification.from_pretrained (model_path,
                                                               num_labels=2,
                                                               id2label= id2label,
                                                               label2id=label2id,)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
# freeze all base model parameters
for name, param in model.base_model.named_parameters():
    param.requires_grad= False
# unfreeze base model pooling layers
for name, param in model.base_model.named_parameters():
    if "pooler" in name:
        param.requires_grad= True

In [5]:
# define text preprocessing
def preprocess_function (examples):
    # return tokenized text with truncation
    return tokenizer (examples ["text"], truncation=True)
# preprocess all datasets
tokenized_data = dataset_dict.map(preprocess_function, batched=True)

Map:   0%|          | 0/450 [00:00<?, ? examples/s]

In [6]:
# create data collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [13]:
# Load metrics
import evaluate
import numpy as np

accuracy = evaluate.load("accuracy")
auc_score = evaluate.load("roc_auc")

def compute_metrics(eval_pred):
    # Get predictions
    predictions, labels = eval_pred
    
    # Cast logits to float for compatibility with numpy functions
    predictions = predictions.astype(float)

    # Apply softmax to get probabilities
    probabilities = np.exp(predictions) / np.exp(predictions).sum(-1, keepdims=True)
    
    # Use probabilities of the positive class for ROC AUC (assuming class 1 is 'phishing')
    positive_class_probs = probabilities[:, 1]

    # Compute AUC
    auc = np.round(auc_score.compute(prediction_scores=positive_class_probs, references=labels)['roc_auc'], 3)

    # Predict most probable class
    predicted_classes = np.argmax(predictions, axis=1)
    
    # Compute accuracy
    acc = np.round(accuracy.compute(predictions=predicted_classes, references=labels)['accuracy'], 3)

    return {"Accuracy": acc, "AUC": auc}


In [9]:
from transformers import TrainingArguments

# Hyperparameters
lr = 2e-4
batch_size = 8
num_epochs = 10  # Corrected variable name to use an underscore

training_args = TrainingArguments(
    output_dir="bert-phishing-classifier_teacher",
    learning_rate=lr,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,  # Fixed assignment operator
    logging_strategy="epoch",
    eval_strategy="epoch",  # Changed 'eval_strategy' to 'evaluation_strategy'
    save_strategy="epoch",
    load_best_model_at_end=True,
)


In [10]:
import transformers
import accelerate

print(transformers.__version__)
print(accelerate.__version__)


4.46.0
1.0.1


In [11]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_data["train"],  # Fixed assignment operator
    eval_dataset=tokenized_data["test"],    # Fixed assignment operator
    tokenizer=tokenizer,                     # Fixed assignment operator
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()


  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,Auc
1,0.494,0.377244,0.816,0.913
2,0.4058,0.367687,0.84,0.934
3,0.3687,0.324456,0.856,0.935
4,0.346,0.403224,0.838,0.944
5,0.3501,0.312271,0.871,0.945
6,0.3522,0.290385,0.862,0.95
7,0.3217,0.30694,0.862,0.947
8,0.3109,0.293975,0.864,0.949
9,0.3214,0.285175,0.873,0.95
10,0.3097,0.297383,0.871,0.951


TrainOutput(global_step=2630, training_loss=0.35805491458327143, metrics={'train_runtime': 1018.0523, 'train_samples_per_second': 20.628, 'train_steps_per_second': 2.583, 'total_flos': 706603239165360.0, 'train_loss': 0.35805491458327143, 'epoch': 10.0})

In [14]:
# Apply the model to the validation dataset
predictions = trainer.predict(tokenized_data["validation"])

# Extract the logits and labels from the predictions object
logits = predictions.predictions
labels = predictions.label_ids

# Use your compute_metrics function
metrics = compute_metrics((logits, labels))
print(metrics)

# Expected output format:
# {'Accuracy': 0.889, 'AUC': 0.946}


{'Accuracy': np.float64(0.88), 'AUC': np.float64(0.947)}


In [15]:
# Save the model and tokenizer
model.save_pretrained("bert-phishing-classifier")
tokenizer.save_pretrained("bert-phishing-classifier")


('bert-phishing-classifier\\tokenizer_config.json',
 'bert-phishing-classifier\\special_tokens_map.json',
 'bert-phishing-classifier\\vocab.txt',
 'bert-phishing-classifier\\added_tokens.json',
 'bert-phishing-classifier\\tokenizer.json')

In [16]:
#loading the saved model 

In [17]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import numpy as np

# Load the model and tokenizer
model = AutoModelForSequenceClassification.from_pretrained("bert-phishing-classifier")
tokenizer = AutoTokenizer.from_pretrained("bert-phishing-classifier")


In [18]:
#This function will preprocess the URL, make predictions, and interpret the output:
def classify_url(url):
    # Tokenize the input URL
    inputs = tokenizer(url, return_tensors="pt", truncation=True, padding=True)
    
    # Get the model predictions
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
    
    # Apply softmax to get probabilities
    probabilities = torch.softmax(logits, dim=1).numpy()
    
    # Get the predicted class (0 for non-phishing, 1 for phishing)
    predicted_class = np.argmax(probabilities, axis=1)[0]
    confidence = probabilities[0][predicted_class]

    # Map the class to a human-readable label
    if predicted_class == 1:
        label = "Phishing"
    else:
        label = "Non-Phishing"

    return label, confidence


In [20]:
# Example URL to classify
new_url = "http://example-phishing-site.XLCCSGF"

# Get the classification and confidence
label, confidence = classify_url(new_url)
print(f"The URL '{new_url}' is classified as: {label} with a confidence of {confidence * 100:.2f}%")


The URL 'http://example-phishing-site.XLCCSGF' is classified as: Phishing with a confidence of 86.90%
