In [8]:
import re
import pickle
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [9]:
# Load trained model
model = load_model('spam_mail_model.h5')

# Load stopwords
with open('stopwords.pkl','rb') as file:
    all_stopwords = pickle.load(file)

# Load preprocessing objects
with open('preprocessing.pkl','rb') as file:
    preprocessing = pickle.load(file)

word2vec_model = preprocessing['word2vec_model']
max_len = preprocessing['max_length']
label_encoder = preprocessing['label_encoder']




In [10]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z\s!?]','',text)
    text = re.sub(r'\s+',' ',text)
    words = [w for w in text.split() if w not in all_stopwords]
    return ' '.join(words)


In [11]:
def lemmatize_text(text):
    # Use the same NLP object from training
    nlp = preprocessing.get('nlp', None)
    if nlp is None:
        raise ValueError("NLP object not found in preprocessing")
    doc = nlp(text)
    return ' '.join(word.lemma_ for word in doc)


In [12]:
def text_to_vectors(text):
    words = text.split()
    vecs = [word2vec_model.wv[word] for word in words if word in word2vec_model.wv]
    # Pad or truncate
    if len(vecs) < max_len:
        vecs.extend([[0]*word2vec_model.vector_size]*(max_len - len(vecs)))
    else:
        vecs = vecs[:max_len]
    return np.array([vecs])

def predict_spam(text):
    cleaned = clean_text(text)
    lemmatized = lemmatize_text(cleaned)
    padded_vecs = text_to_vectors(lemmatized)
    pred = model.predict(padded_vecs)[0][0]
    return 1 if pred > 0.5 else 0


In [13]:
text = """Subject: Invitation for Interview – Software Engineer Position
Body:
Dear Applicant,

Thank you for applying for the Software Engineer position at our company. We are pleased to invite you for an interview.

📅 Date: March 12, 2025
⏰ Time: 10:00 AM
📍 Location: ABC Corp, 123 Street, NY

Please reply to confirm your availability. Looking forward to meeting you!

Best Regards,
HR Team
"""


In [14]:
if predict_spam(text):
    print("Spam")
else:
    print("Ham")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 641ms/step
Ham
