## Import Libraries

In [2]:
import pandas as pd
import numpy as np
import pickle
import requests
import re
import string

## Download Model from Github

In [3]:
url_nb_model = "https://raw.githubusercontent.com/Syeda-Mahjabin-Proma/Spam_Ham_Classifier/main/naive_bayes_model.pkl"
url_vectorizer = "https://raw.githubusercontent.com/Syeda-Mahjabin-Proma/Spam_Ham_Classifier/main/tfidf_vectorizer.pkl"

## Save and Load

In [4]:
with open("naive_bayes_model.pkl", "wb") as f:
    f.write(requests.get(url_nb_model).content)
with open("tfidf_vectorizer.pkl", "wb") as f:
    f.write(requests.get(url_vectorizer).content)

with open("naive_bayes_model.pkl", "rb") as f:
    nb_model = pickle.load(f)
with open("tfidf_vectorizer.pkl", "rb") as f:
    tfidf_vectorizer = pickle.load(f)

print("Model and vectorizer loaded successfully!")

Model and vectorizer loaded successfully!


## Clean Email Body

In [5]:
def clean_email_text(text):
    if pd.isna(text):
        return ""

    text = re.sub(r"From:.*\n", "", text)
    text = re.sub(r"Subject:.*\n", "", text)
    text = text.lower()
    text = text.translate(str.maketrans("", "", string.punctuation))
    text = re.sub(r"\d+", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text


## Testing with real-world data

In [9]:
email_body = 'Notifications involving money owed to you are enticing, aren’t they? “Our records show you overpaid for (a product or service). Kindly supply your bank routing and account number to receive your refund.” Don’t fall for it.'
cleaned_email = clean_email_text(email_body)

email_vector = tfidf_vectorizer.transform([cleaned_email])
prediction = nb_model.predict(email_vector)[0]

if prediction == 0:
    print("This email looks safe. It's HAM.")
else:
    print("Warning!!! This email is likely SPAM. Proceed with caution.")




In [8]:
email_body = "You recognize the sender and are expecting their message, such as a confirmation for an online order or a response to a customer service request. "
cleaned_email = clean_email_text(email_body)

email_vector = tfidf_vectorizer.transform([cleaned_email])
prediction = nb_model.predict(email_vector)[0]

if prediction == 0:
    print("This email looks safe. It's HAM.")
else:
    print("Warning!!! This email is likely SPAM. Proceed with caution.")


This email looks safe. It's HAM.


## Check the Model By Yourself

In [10]:
email_body = input("Enter your Email Body or SMS: ")
cleaned_email = clean_email_text(email_body)

email_vector = tfidf_vectorizer.transform([cleaned_email])
prediction = nb_model.predict(email_vector)[0]
print()

if prediction == 0:
    print("This email looks safe. It's HAM.")
else:
    print("Warning!!! This email is likely SPAM. Proceed with caution.")


Enter your Email Body or SMS: abc

This email looks safe. It's HAM.
