In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 
import re
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from peft import PeftModel
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import BitsAndBytesConfig


In [2]:
def preprocess_url(url):
    url = re.sub(r'https?://', '', url)
    parts = url.split('/', 1)
    domain = parts[0]
    path = parts[1] if len(parts) > 1 else ""
    text_rep = f"{domain} {path.replace('/', ' ')}"

    return text_rep

In [3]:
data = pd.read_csv("./test_data.csv")
data['type'] = data['type'].map(lambda x: 1 if x == 'benign' else 0)
data.head()

Unnamed: 0.1,Unnamed: 0,url,type
0,344275,http://85.217.170.105/XXX.sh,0
1,145258,technews.tmcnet.com/news/2011/07/28/5668630.htm,1
2,391135,crownpoint.com/artists/101/biography,1
3,397356,mrnormsgarage.com/vehicles/1971-cuda-convertib...,1
4,262630,http://www.colegiosanignacio.com.ar/index.php?...,0


In [26]:
model_path = "deberta_v3-1"
model_name = "microsoft/deberta-v3-base"
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)
quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",  # NormalFloat4 quantization (best for LLMs)
        bnb_4bit_compute_dtype=torch.float16,  # Use FP16 for computation
        bnb_4bit_use_double_quant=True,  # Further reduces memory usage
        )

base_model = AutoModelForSequenceClassification.from_pretrained(
            model_name,
            num_labels=2,
            quantization_config=quantization_config,  # Use the correct config format
            device_map="auto",
        )
model = PeftModel.from_pretrained(base_model, model_path, is_trainable=False)

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [27]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

PeftModel(
  (base_model): LoraModel(
    (model): DebertaV2ForSequenceClassification(
      (deberta): DebertaV2Model(
        (embeddings): DebertaV2Embeddings(
          (word_embeddings): Embedding(128100, 768, padding_idx=0)
          (LayerNorm): LayerNorm((768,), eps=1e-07, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (encoder): DebertaV2Encoder(
          (layer): ModuleList(
            (0-11): 12 x DebertaV2Layer(
              (attention): DebertaV2Attention(
                (self): DisentangledSelfAttention(
                  (query_proj): lora.Linear4bit(
                    (base_layer): Linear4bit(in_features=768, out_features=768, bias=True)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.1, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Linear(in_features=768, out_features=8, bias=False)
                    )
   

In [28]:
X_test = [preprocess_url(url) for url in data['url']]
y_test = [label for label in data['type']]

In [29]:
batch_size = 16
dataloader = DataLoader(list(zip(X_test, y_test)), batch_size=batch_size, shuffle=False)

In [30]:
model.eval()
all_predictions = []
all_labels = []

with torch.no_grad():
    for batch_texts, batch_labels in dataloader:
        inputs = tokenizer(batch_texts, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
        outputs = model(**inputs)
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1).tolist()

        all_predictions.extend(predictions)
        all_labels.extend(batch_labels)


In [31]:
accuracy = accuracy_score(all_labels, all_predictions)
precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_predictions, average="binary")
report = classification_report(all_labels, all_predictions, target_names=["Negative", "Positive"])

# Print results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("\nClassification Report:\n", report)

Accuracy: 0.8934
Precision: 0.8639
Recall: 0.9347
F1 Score: 0.8979

Classification Report:
               precision    recall  f1-score   support

    Negative       0.93      0.85      0.89     53104
    Positive       0.86      0.93      0.90     53416

    accuracy                           0.89    106520
   macro avg       0.90      0.89      0.89    106520
weighted avg       0.90      0.89      0.89    106520



In [10]:
inputs = tokenizer([preprocess_url("https://webmail-aurba-inbox.netlify.app/")], return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
outputs = model(**inputs)

In [11]:
outputs

SequenceClassifierOutput(loss=None, logits=tensor([[-0.0363,  0.0664]], device='cuda:0', dtype=torch.float16), hidden_states=None, attentions=None)

In [12]:
import torch
probs = torch.softmax(outputs.logits, dim=1)
print(probs)


tensor([[0.4744, 0.5259]], device='cuda:0', dtype=torch.float16)
