In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.optim as optim
import joblib

In [21]:
# Load the dataset
data = pd.read_csv("dataset_phishing.csv", encoding='unicode_escape')

# Selecting features and target
X = data.drop(columns=['url', 'status'])  # Drop non-numerical and target column
y = data['status']  # Target column

# Encoding the target variable
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Standardizing the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
joblib.dump(scaler, "scaler.pkl")

# Splitting the data into training and testing sets
random_seed = 42
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_encoded, test_size=0.2, random_state=random_seed, stratify=y_encoded
)

# Converting to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Creating DataLoader for batch processing
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)


In [22]:
class PhishingDetectionModel(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(PhishingDetectionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.bn1 = nn.BatchNorm1d(hidden_size)

        self.fc2 = nn.Linear(hidden_size, hidden_size // 2)
        self.bn2 = nn.BatchNorm1d(hidden_size // 2)

        self.fc3 = nn.Linear(hidden_size // 2, hidden_size // 4)  # Single output for binary classification
        self.bn3 = nn.BatchNorm1d(hidden_size // 4)

        self.fc4 = nn.Linear(hidden_size // 4, 1)  # Single output for binary classification

        self.dropout = nn.Dropout(0.3)
        self.activation = nn.ReLU()  # Activation for hidden layers

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.activation(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.activation(x)
        x = self.dropout(x)
        x = self.fc3(x)
        x = self.bn3(x)
        x = self.activation(x)
        x = self.dropout(x)
        x = self.fc4(x)  # Output logits (raw scores)
        return x

# Model parameters
input_size = X_train.shape[1]
hidden_size = 128
model = PhishingDetectionModel(input_size, hidden_size)

In [23]:
def train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs, save_path):
    best_f1 = 0.0  # Track the best F1 score

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        all_labels = []
        all_preds = []

        # Training phase
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)  # Raw logits

            # Calculate loss
            loss = criterion(outputs, y_batch.unsqueeze(1).float())  # Match output and label shapes
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            # Convert logits to probabilities
            probabilities = torch.sigmoid(outputs)

            # Apply threshold to get binary predictions
            predicted = (probabilities >= 0.5).int()

            # Collect predictions and true labels for F1 score and accuracy
            all_labels.extend(y_batch.numpy())
            all_preds.extend(predicted.numpy().flatten())
        
        # Calculate F1 score and accuracy for the epoch
        epoch_f1 = f1_score(all_labels, all_preds)
        epoch_accuracy = accuracy_score(all_labels, all_preds)
        
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}, "
              f"F1 Score: {epoch_f1:.4f}, Accuracy: {epoch_accuracy:.4f}")

        # Evaluation phase
        model.eval()
        all_labels = []
        all_preds = []

        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                outputs = model(X_batch)

                # Convert logits to probabilities
                probabilities = torch.sigmoid(outputs)

                # Apply threshold to get binary predictions
                predicted = (probabilities >= 0.5).int()

                # Collect predictions and true labels for F1 score and accuracy
                all_labels.extend(y_batch.numpy())
                all_preds.extend(predicted.numpy().flatten())
        
        # Calculate F1 score and accuracy
        f1 = f1_score(all_labels, all_preds)
        accuracy = accuracy_score(all_labels, all_preds)

        print(f"Test Accuracy: {accuracy * 100:.2f}%")
        print(f"Test F1 Score: {f1:.4f}")        

        # Save the model if it performs better
        if f1 > best_f1:
            best_f1 = f1
            best_epoch = epoch
            torch.save(model.state_dict(), save_path)
            print(f"New best model saved with F1 Score: {f1:.4f} at epoch {epoch + 1}")

    print(f'Best model saved at epoch {best_epoch + 1} with F1 score of {best_f1:.4f}')

# Parameters for training
num_epochs = 30
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)
save_path = "best_model.pth"

In [24]:
# Train the model
train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs, save_path)

Epoch 1/30, Loss: 0.5838, F1 Score: 0.6283, Accuracy: 0.6966
Test Accuracy: 78.57%
Test F1 Score: 0.7614
New best model saved with F1 Score: 0.7614 at epoch 1
Epoch 2/30, Loss: 0.4779, F1 Score: 0.7751, Accuracy: 0.7882
Test Accuracy: 80.45%
Test F1 Score: 0.7904
New best model saved with F1 Score: 0.7904 at epoch 2
Epoch 3/30, Loss: 0.4469, F1 Score: 0.7942, Accuracy: 0.8033
Test Accuracy: 81.80%
Test F1 Score: 0.8049
New best model saved with F1 Score: 0.8049 at epoch 3
Epoch 4/30, Loss: 0.4374, F1 Score: 0.7971, Accuracy: 0.8053
Test Accuracy: 82.85%
Test F1 Score: 0.8207
New best model saved with F1 Score: 0.8207 at epoch 4
Epoch 5/30, Loss: 0.4283, F1 Score: 0.8010, Accuracy: 0.8112
Test Accuracy: 81.98%
Test F1 Score: 0.8100
Epoch 6/30, Loss: 0.4268, F1 Score: 0.8003, Accuracy: 0.8081
Test Accuracy: 82.20%
Test F1 Score: 0.8127
Epoch 7/30, Loss: 0.4156, F1 Score: 0.8065, Accuracy: 0.8151
Test Accuracy: 83.07%
Test F1 Score: 0.8229
New best model saved with F1 Score: 0.8229 at epo

In [25]:
model.load_state_dict(torch.load("best_model.pth"))
model.eval()
all_labels = []
all_preds = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        probabilities = torch.sigmoid(outputs)  # Shape: (batch_size, 1)
        predicted = (probabilities >= 0.5).int()  # Shape: (batch_size, 1)
        all_labels.extend(y_batch.numpy())  # True labels
        all_preds.extend(predicted.numpy().flatten())  # Predicted binary labels

# Calculate F1 score and accuracy
f1 = f1_score(all_labels, all_preds)
accuracy = accuracy_score(all_labels, all_preds)

# Calculate confusion matrix
cm = confusion_matrix(all_labels, all_preds)

# Extract TP, TN, FP, FN from the confusion matrix
if cm.shape == (2, 2):  # Binary classification
    TN, FP, FN, TP = cm.ravel()
else:  # Multiclass: calculate separately for each class
    TP = cm.diagonal()  # True positives for each class
    FP = cm.sum(axis=0) - TP  # False positives for each class
    FN = cm.sum(axis=1) - TP  # False negatives for each class
    TN = cm.sum() - (FP + FN + TP)  # True negatives for each class

# Print results
print(f"Test Accuracy: {accuracy * 100:.2f}%")
print(f"Test F1 Score: {f1:.4f}")
print("\nClassification Metrics:")
print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")
print(f'Total Samples: {TP + FP + TN + FN}')



Test Accuracy: 84.91%
Test F1 Score: 0.8473

Classification Metrics:
True Positives (TP): 957
True Negatives (TN): 984
False Positives (FP): 159
False Negatives (FN): 186
Total Samples: 2286


  model.load_state_dict(torch.load("best_model.pth"))
