# **Model Tester**

This notebook tests the model against real-world job offer messages and everyday emails to assess its practical performance. The goal is to determine whether the model is deployment-ready or requires further fine-tuning to reliably classify job-related spam and legitimate messages.

In [1]:
from tokenizers import Tokenizer, models, trainers, pre_tokenizers
from pathlib import Path
from torch.nn import LSTM
import torch.nn as nn
import torch

In [2]:
# Tokenizer

tokenizer_path = Path("tokenizer.json")

print("Loading tokenizer...")
tokenizer = Tokenizer.from_file(str(tokenizer_path))
print("Tokenizer Loaded Successfully!")

Loading tokenizer...
Tokenizer Loaded Successfully!


In [3]:
# Define LSTM hyperparameters
vocab_size = tokenizer.get_vocab_size()
embedding_dim = 100 # Dimension of the word embeddings
hidden_dim = 256 # Number of hidden units in the LSTM
output_dim = 1 # Output dimension (1 for binary classification)
num_layers = 3 # Number of LSTM layers
bidirectional = True # Whether the LSTM is bidirectional
dropout = 0.3 # Dropout rate

In [4]:
# LSTM Model Definition
class LSTMSpamClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, num_layers, bidirectional, dropout):
        super().__init__()

        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = LSTM(embedding_dim,
                         hidden_dim,
                         num_layers=num_layers,
                         bidirectional=bidirectional,
                         dropout=dropout,
                         batch_first=True)
        self.fc = nn.Linear(hidden_dim * (2 if bidirectional else 1), output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, text):
        # text = [batch size, seq len]

        embedded = self.dropout(self.embedding(text))
        # embedded = [batch size, seq len, emb dim]

        output, (hidden, cell) = self.lstm(embedded)
        # output = [batch size, seq len, hidden dim * num directions]
        # hidden = [num layers * num directions, batch size, hidden dim]
        # cell = [num layers * num directions, batch size, hidden dim]

        # Concatenate the final forward and backward hidden states
        if self.lstm.bidirectional:
            hidden = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1)
        else:
            hidden = hidden[-1,:,:]
        # hidden = [batch size, hidden dim * num directions]

        dense_output = self.fc(hidden)
        # dense_output = [batch size, output dim]

        return dense_output

In [5]:
# Define model and file path
model = LSTMSpamClassifier(vocab_size, embedding_dim, hidden_dim, output_dim, num_layers, bidirectional, dropout)

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model.to(device)
model_path = Path("spam-ham-detection-best-model.pt")

In [6]:
# Loading the model
print("Loading Model...")
model.load_state_dict(torch.load(model_path))
print("Model Loaded Succesfully")

Loading Model...
Model Loaded Succesfully


In [7]:
# Set model to evaluation mode
model.eval()

# Creating the function for prediction
def predict_spam(text, model, tokenizer, max_len=128, device=device): 
    model.eval()
    with torch.no_grad():
        # Tokenize the input text
        tokens = tokenizer.encode(text).ids

        # Pad the tokenized sequence
        padded_tokens = tokens[:max_len] + [tokenizer.token_to_id("[PAD]")] * (max_len - len(tokens))
        input_tensor = torch.tensor(padded_tokens).unsqueeze(0).to(device)

        # Get model prediction
        prediction = model(input_tensor)

        # Apply signoid to get probability
        probability = torch.sigmoid(prediction).item()

        # Determine the label (0 for Ham, 1 for Spam)
        label = 1 if probability > 0.7 else 0   # 70% threshold

        return probability, label

In [10]:
texts = [
    # --- Job Offer Emails ---
    "Dear Applicant, we are pleased to inform you that your profile has been shortlisted for the Software Engineer position. Please reply with your updated resume.",
    "Hello, we came across your portfolio and would love to schedule an interview for the Front-End Developer role.",
    "Congratulations! You've been selected for the Data Analyst position at ABC Corporation. Kindly confirm your availability for a quick call.",
    "We are currently hiring for a Junior Web Developer. Interested candidates may apply with their latest CV.",
    "Hi there, our HR team reviewed your application and wants to discuss the Backend Engineer role with you.",
    "We have an opening for a Remote UI/UX Designer. The position offers flexible working hours and great benefits.",
    "Your skills in Python and machine learning caught our attention. Please schedule an interview for the Research Engineer role.",
    "We would like to offer you a position as a Marketing Assistant at our Manila branch. Let us know if you’re interested.",
    "Our recruitment team is impressed with your background. Please complete the attached form to proceed with the hiring process.",
    "Hello, we are hiring interns for our IT department. Please send your resume before the end of this week.",

    # --- Normal / Personal Emails ---
    "Hey, are we still meeting later for coffee?",
    "Mom asked if you could pick up some groceries on your way home.",
    "Just checking in to see how your weekend went!",
    "Happy birthday! Wishing you all the best this year.",
    "Let’s catch up soon — it’s been too long!",
    "Can you please send me the files from yesterday’s meeting?",
    "I attached the slides you asked for. Let me know if you need edits.",
    "Thanks for helping me with my project yesterday. I really appreciate it!",
    "Don’t forget our dinner reservation at 7 PM tonight.",
    "Good luck with your exams! You’ve got this.",
    "The meeting has been rescheduled to 10 AM tomorrow.",
    "Please review the report and send your feedback by Friday.",
    "Are you available for a quick chat this afternoon?",
    "I just finished watching that movie you recommended — it was great!",
    "Can you send me the invoice for the last purchase?",
    "Hope you’re feeling better now. Get some rest!",

    # --- Spam / Scam Emails ---
    "Congratulations! You’ve won a $1,000 gift card. Click here to claim your prize now!",
    "Your account has been suspended. Verify your information immediately to restore access.",
    "Earn money fast from home with this secret method — no experience required!",
    "Claim your free iPhone 15 today! Only a few left!",
    "We have been trying to reach you about your car’s extended warranty.",
    "You have an unclaimed package waiting. Click this link to confirm delivery.",
    "Get a guaranteed loan with zero interest! Apply now.",
    "Limited time offer! Buy one, get one free on all luxury watches.",
    "Increase your credit score overnight using this simple trick!",
    "Exclusive deal for you — 90% off designer brands today only!",
    "This is your final notice: pay your tax fees immediately or face legal action.",
    "Earn $5,000 a week by completing online surveys. Sign up now!",
    "Your PayPal account was compromised. Login now to secure it.",
    "Congratulations! You’ve been randomly selected for a free vacation to the Bahamas.",
    "You won’t believe this investment opportunity — guaranteed returns!",
    "Dear user, your email has been chosen for a cash reward. Confirm details here.",
    "Get unlimited followers on Instagram instantly by using our new app.",
    "Act fast! Your computer is infected — download our antivirus tool now.",
    "We noticed suspicious login attempts from another country. Verify your account.",
    "Your shipment is on hold due to unpaid customs fees. Pay now to release it.",
    "Click here to see who visited your Facebook profile today.",
    "You’ve been approved for a $50,000 loan — no credit check required.",
    "Win an all-expenses-paid trip by filling out this short form.",
    "Make thousands daily trading cryptocurrency with zero risk!",
    "You’ve been selected for a government relief grant. Apply before midnight.",
    "Dear winner, claim your lottery prize immediately before it expires.",
    "Increase your website traffic by 500% instantly — no effort needed!",
    "This is not a scam! You’ve really won $10,000!",
    "Download our free software to boost your PC speed right now.",
    "Get 100% return on investment within a week. Don’t miss out!"
]


In [11]:
# Get predictions
spam_texts = []
ham_texts = []

for text in texts:
    prob, label = predict_spam(text=text, model=model, tokenizer=tokenizer, max_len=128, device=device)
    print(f"Text:\n")
    print(text)
    print(f"\nSpam Probability: {prob:.4f}")
    print(f"Predicted Label: {'Spam' if label == 1 else 'Ham'}\n")

    # Adding to spam and ham texts array
    if(label == 1):
        spam_texts.append(text)
    else:
        ham_texts.append(text)

print(f"Number of Ham Texts: {len(ham_texts)}")
print(f"Number of Spam Texts: {len(spam_texts)}")

Text:

Dear Applicant, we are pleased to inform you that your profile has been shortlisted for the Software Engineer position. Please reply with your updated resume.

Spam Probability: 0.9032
Predicted Label: Spam

Text:

Hello, we came across your portfolio and would love to schedule an interview for the Front-End Developer role.

Spam Probability: 0.1905
Predicted Label: Ham

Text:

Congratulations! You've been selected for the Data Analyst position at ABC Corporation. Kindly confirm your availability for a quick call.

Spam Probability: 0.5944
Predicted Label: Ham

Text:

We are currently hiring for a Junior Web Developer. Interested candidates may apply with their latest CV.

Spam Probability: 0.0884
Predicted Label: Ham

Text:

Hi there, our HR team reviewed your application and wants to discuss the Backend Engineer role with you.

Spam Probability: 0.2044
Predicted Label: Ham

Text:

We have an opening for a Remote UI/UX Designer. The position offers flexible working hours and gr