### Comprehensive Performance Metrics for BiLSTM Model

In [1]:
import numpy as np
import pandas as pd
from utils.constants import DATA_PATH, GLOVE_PATH

DATA_PATH

'/home/michael/PycharmProjects/spam-detection-data'

In [2]:
# Load the data
train_df = pd.read_pickle(DATA_PATH + '/data/processed/train.pkl')
test_df = pd.read_pickle(DATA_PATH + '/data/processed/test.pkl')

In [3]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [4]:
from utils.functions import set_seed, build_vocab

# Build vocabulary and load embeddings
set_seed(42)
word2idx, idx2word = build_vocab(train_df['text'])
embedding_dim = 300
max_len = 200

In [5]:
from preprocess.data_loader import load_glove_embeddings

pretrained_embeddings = load_glove_embeddings(GLOVE_PATH, word2idx, embedding_dim)

In [7]:
# Load the trained BiLSTM model
from models.bilstm import BiLSTMSpam

model_path = DATA_PATH + '/trained-models/best_bilstm_model.pt'
model = BiLSTMSpam(vocab_size=len(word2idx), embedding_dim=embedding_dim,
                   pretrained_embeddings=pretrained_embeddings)
model.load(model_path)
model = model.to(device)
model.eval()

RuntimeError: Error(s) in loading state_dict for BiLSTMSpam:
	size mismatch for embedding.weight: copying a param with shape torch.Size([25373, 300]) from checkpoint, the shape in current model is torch.Size([25245, 300]).

In [None]:
from utils.functions import encode

# Prepare test data
X_test_tensor = torch.tensor([encode(t, word2idx, max_len) for t in test_df['text']])
y_test_tensor = torch.tensor(test_df['label'].values, dtype=torch.float32)

# Move data to device
X_test_tensor = X_test_tensor.to(device)
y_test_tensor = y_test_tensor.to(device)

In [None]:
# Get model predictions
with torch.no_grad():
    y_pred_probs = model(X_test_tensor)
    y_pred = (y_pred_probs > 0.5).float()

In [None]:
from metrics.metrics import compute_metrics
import matplotlib.pyplot as plt
import seaborn as sns

# Compute metrics - now returns a DataFrame
metrics_df = compute_metrics(y_test_tensor.cpu().numpy(), y_pred.cpu().numpy(), y_pred_probs.cpu().numpy())
metrics_df

In [None]:
# Visualize the confusion matrix
confusion_matrix = metrics_df.attrs['confusion_matrix']

# Create a visual confusion matrix
cm_values = np.array([
    [confusion_matrix['tn'], confusion_matrix['fp']],
    [confusion_matrix['fn'], confusion_matrix['tp']]
])

plt.figure(figsize=(8, 6))
sns.heatmap(cm_values, annot=True, fmt='d', cmap='Blues',
           xticklabels=['Predicted Negative (Ham)', 'Predicted Positive (Spam)'],
           yticklabels=['Actual Negative (Ham)', 'Actual Positive (Spam)'])
plt.title('Confusion Matrix')
plt.ylabel('Actual Label')
plt.xlabel('Predicted Label')
plt.tight_layout()
plt.show()

print(f"\nConfusion Matrix Details:")
print(f"True Positives (TP): {confusion_matrix['tp']} - Correctly classified spam messages")
print(f"True Negatives (TN): {confusion_matrix['tn']} - Correctly classified ham messages")
print(f"False Positives (FP): {confusion_matrix['fp']} - Ham messages incorrectly classified as spam")
print(f"False Negatives (FN): {confusion_matrix['fn']} - Spam messages incorrectly classified as ham")