# Imports

In [33]:
import joblib
import re

# --- 1. Load the Saved Model and Vectorizer ---

In [34]:
try:
    model = joblib.load('model/spam_classifier_model.pkl')
    vectorizer = joblib.load('model/tfidf_vectorizer.pkl')
    print("Model and vectorizer loaded successfully.")
except FileNotFoundError:
    print("Error: Model or vectorizer files not found.")
    print("Please run the training script first to create these files.")
    exit()

Model and vectorizer loaded successfully.


# --- 2. Define the Preprocessing and Prediction Functions ---

In [35]:
def preprocess_text(text):
    """Applies the exact same cleaning steps used during training."""
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

def classify_email(subject, body):
    """
    Takes an email's subject and body, preprocesses them, and predicts
    if it is spam (1) or not spam (0).
    """
    full_text = subject + " " + body
    cleaned_text = preprocess_text(full_text)
    
    text_vector = vectorizer.transform([cleaned_text])
    
    prediction = model.predict(text_vector)
    probability = model.predict_proba(text_vector)
    
    # Get the probability of the email being spam (class 1)
    if prediction == 1:
        return f"Prediction: SPAM (Confidence: {probability[0][1]:.2%})"
    else:
        return f"Prediction: NOT SPAM (Confidence: {probability[0][0]:.2%})"

# --- 3. Query the Model with Example Emails ---

In [36]:
# Example 1: A classic spam email
spam_subject = "URGENT: You have won a prize!"
spam_body = "Congratulations, you have been selected to win a free vacation. Click here to claim now."

# Example 2: A typical legitimate email
ham_subject = "Meeting Reminder"
ham_body = "Hi team, just a reminder that our quarterly review meeting is scheduled for tomorrow at 10 AM. Please find the agenda attached."

print("\n--- Classifying New Emails ---")
print(f"\nEmail 1:\nSubject: {spam_subject}\nBody: {spam_body}")
print(classify_email(spam_subject, spam_body))

print(f"\nEmail 2:\nSubject: {ham_subject}\nBody: {ham_body}")
print(classify_email(ham_subject, ham_body))


--- Classifying New Emails ---

Email 1:
Subject: URGENT: You have won a prize!
Body: Congratulations, you have been selected to win a free vacation. Click here to claim now.
Prediction: SPAM (Confidence: 90.45%)

Email 2:
Subject: Meeting Reminder
Body: Hi team, just a reminder that our quarterly review meeting is scheduled for tomorrow at 10 AM. Please find the agenda attached.
Prediction: NOT SPAM (Confidence: 75.63%)
