In [1]:
# Import modules from main
import sys
import os
repo_root = os.path.dirname(os.path.abspath(''))
if repo_root not in sys.path:
    sys.path.append(repo_root)

In [2]:
# %load_ext autoreload
# %autoreload 2 
# %reload_ext autoreload

In [3]:
## Importing the required libraries
from main.model import DeepAntiPhish
from main.feature_engineering import process, imbalance_ratio
from main.runners import training
from main.helpers import get_feature_count, prettyPrintMetrics, compute_metrics

In [4]:
# Initiate the training and test loader
train_loader, test_loader = process()

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of  13 | elapsed:   15.2s remaining:  1.4min
[Parallel(n_jobs=-1)]: Done   6 out of  13 | elapsed:   16.2s remaining:   19.0s
[Parallel(n_jobs=-1)]: Done  10 out of  13 | elapsed:   16.9s remaining:    5.0s
[Parallel(n_jobs=-1)]: Done  13 out of  13 | elapsed: 12.6min finished


In [5]:
import warnings
warnings.filterwarnings('ignore')
%config Application.log_level = 'ERROR'

In [None]:
# Initialize model and commence training
ratio = imbalance_ratio(train_loader)
model = DeepAntiPhish(input_dims=get_feature_count(train_loader))
modelPerformance = training(model, train_loader, test_loader, 
                         pos_neg_ratio=ratio,
                         cycles=8, 
                         epochs_per_cycle=7,
                         lr=2e-3,
                        weight_decay=1e-2)

---------------- Cycle: 01 / 08 -----------------------------
Computed accuracy at epoch: 01/7 is: 98.19%
 	Cycle Model updated to new value:      Loss: 0.3189    | Accuracy: 98.19%    | Precision: 0.9976    | Recall: 0.9716    | F1-score: 0.9844
    
Computed accuracy at epoch: 02/7 is: 98.92%
 	Cycle Model updated to new value:      Loss: 0.2202    | Accuracy: 98.92%    | Precision: 0.9981    | Recall: 0.9836    | F1-score: 0.9908
    
Computed accuracy at epoch: 03/7 is: 97.90%
Computed accuracy at epoch: 04/7 is: 97.82%
Computed accuracy at epoch: 05/7 is: 95.82%
Computed accuracy at epoch: 06/7 is: 97.40%
Computed accuracy at epoch: 07/7 is: 96.64%
 	Global Model updated to new value:      Loss: 0.2202    | Accuracy: 98.92%    | Precision: 0.9981    | Recall: 0.9836    | F1-score: 0.9908
    

 Saved model at path: ../models/deep_antiphish_cycle00.pth with val_loss = 0.9892)
---------------- Cycle: 02 / 08 -----------------------------
Computed accuracy at epoch: 01/7 is: 97.19%
 

In [None]:
# Evaluation of the best model
import torch
best_model = DeepAntiPhish(input_dims=get_feature_count(train_loader))
best_model.load_state_dict(torch.load("../models/deep_antiphish_best_model.pth"))
best_model.eval()
modelPerformance, y_true, y_preds  = compute_metrics(best_model, test_loader)
prettyPrintMetrics(modelPerformance, "Performance: ", print_confusion_matrix=True, print_classification_report=True)

In [None]:
## ROC Curve
from sklearn.metrics import roc_curve, roc_auc_score
import matplotlib.pyplot as plt
# Get probability scores
y_probs = []
with torch.no_grad():
    for X_batch, _ in test_loader:
        outputs = best_model(X_batch).view(-1)
        y_probs.extend(torch.sigmoid(outputs).tolist())

fpr, tpr, _ = roc_curve(y_true, y_probs)
auc_score = roc_auc_score(y_true, y_probs)
plt.figure(figsize=(6, 4))
plt.plot(fpr, tpr, label=f'AUC = {auc_score:.4f}')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend(loc="lower right")
plt.grid(True)
plt.show()

In [None]:
##Heatmap/Confusion Matrix
import seaborn as sns
from sklearn.metrics import confusion_matrix
conf_matrix = confusion_matrix(y_true, y_preds, labels=[0,1])
plt.figure(figsize=(6,4))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Phish', 'Safe'], yticklabels=['Phish', 'Safe'])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

In [None]:
from sklearn.metrics import precision_recall_curve
precision, recall, _ = precision_recall_curve(y_true, y_probs)
plt.figure(figsize=(6, 4))
plt.plot(recall, precision, label="Precision-Recall Curve")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision vs Recall")
plt.grid(True)
plt.show()
plt.figure(figsize=(6, 4))
plt.hist(y_probs, bins=50, alpha=0.7, color='purple')
plt.title("Prediction Confidence (Probabilities)")
plt.xlabel("Probability")
plt.ylabel("Frequency")
plt.grid(True)
plt.show()

In [None]:
#Model Performance Batch-wise

import torch
import torch.nn as nn
checkpoint_path = "../models/deep_antiphish_best_model.pth"
state_dict      = torch.load(checkpoint_path)   
best_model.load_state_dict(state_dict)    

best_model.eval()
criterion = nn.BCEWithLogitsLoss()
batch_losses = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = best_model(X_batch).view(-1)
        loss    = criterion(outputs, y_batch)
        batch_losses.append(loss.item())


# Plotting batch-wise loss
plt.figure(figsize=(8, 5))
plt.plot(batch_losses, marker='o')
plt.title('Loss per Batch on Best Model')
plt.xlabel('Batch Index')
plt.ylabel('Loss')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
from main.feature_engineering import df_from_csv
from main.constants import TEST_DATA
test_df = df_from_csv(TEST_DATA)

In [None]:
y_true, y_pred, y_prob = [], [], []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = best_model(X_batch).view(-1)
        prob    = torch.sigmoid(outputs)
        preds   = (prob >= 0.5).int()

        y_true.extend(y_batch.int().tolist())
        y_pred.extend(preds.tolist())
        y_prob.extend(prob.tolist())

# Assemble the DataFrame (row order == test_df because shuffle=False)
results_df = test_df.reset_index(drop=True).loc[:, [
    "body_count_of_words", "body_length", "url_count",
    "attachment_count", "has_url", "has_attachment"
]].copy()

results_df["pred_prob"]   = y_prob
results_df["predicted"]   = y_pred
results_df["true_label"]  = y_true


##False Positive vs False Negative

# Define False Positives and False Negatives
false_positives = results_df[(results_df['true_label'] == 0) & (results_df['predicted'] == 1)]
false_negatives = results_df[(results_df['true_label'] == 1) & (results_df['predicted'] == 0)]
# Features to compare
features_to_plot = [
    "body_count_of_words", "body_length", "url_count",
    "attachment_count", "has_url", "has_attachment", "pred_prob"
]
# Start plotting
plt.figure(figsize=(18, 12))
plot_index = 1

for feature in features_to_plot:
    # Skip features with zero variance in either group
    if false_positives[feature].nunique() <= 1 or false_negatives[feature].nunique() <= 1:
        print(f" Skipping '{feature}' due to zero variance in one group.")
        continue

    plt.subplot(3, 3, plot_index)
    sns.kdeplot(false_positives[feature], label='False Positives', color='red', fill=True)
    sns.kdeplot(false_negatives[feature], label='False Negatives', color='blue', fill=True)
    plt.title(f"Distribution of {feature}")
    plt.xlabel(feature)
    plt.ylabel("Density")
    plt.legend()
    plot_index += 1

plt.tight_layout()
plt.suptitle("False Positives vs False Negatives — Feature Distributions", fontsize=16, y=1.02)
plt.show()