## **Eg: Spam.txt file**

**Subject:** Congratulations! You've Won $100,000 Cash Prize

Dear George,

I am thrilled to inform you that you are the lucky winner of our recent contest and have been awarded a cash prize of $100,000! Your participation and enthusiasm are truly appreciated, and we couldn't be happier to share this exciting news with you.

Regards,  
idontSmile


## **Eg: NSpam.txt**

**Subject:** Invitation for Dinner

Dear Friend,

I hope this email finds you well. I wanted to extend a warm invitation to you for a dinner party at my home on **18/8/2023**, next Friday. It would be wonderful to have you join us.

Best regards,  
iSmile


In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import numpy as np
import re
from sklearn.utils import shuffle

# Example data (expand with more data for better performance)
emails = [
    "Buy cheap watches! Free shipping!",
    "Meeting for lunch today?",
    "Claim your prize! You've won $1,000,000!",
    "Important meeting at 3 pm.",
    "You're invited to a dinner party at my place.",
    "Exclusive deal just for you!",
    "How about a catch-up call this weekend?",
    "Congratulations! You've won a prize!"
]
labels = [1, 0, 1, 0, 0, 1, 0, 1]

# Text preprocessing function
def preprocess_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\d+', '', text)  # Remove numbers
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    return text

# Preprocess the email data
emails = [preprocess_text(email) for email in emails]

# Tokenize and pad the email text data
max_words = 1000
max_len = 50

tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(emails)
sequences = tokenizer.texts_to_sequences(emails)
X_padded = pad_sequences(sequences, maxlen=max_len, padding="post", truncating="post")

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_padded, labels, test_size=0.2, random_state=42)

# Convert labels to NumPy arrays to ensure compatibility
y_train = np.array(y_train)
y_test = np.array(y_test)

# Define the neural network model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=max_words, output_dim=32, input_length=max_len),
    tf.keras.layers.LSTM(64, return_sequences=True),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=["accuracy"])

# Train the model
model.fit(np.array(X_train), y_train, epochs=10, validation_data=(np.array(X_test), y_test), verbose=1)

# Test the model
sample_file_path = "Spam.txt"
try:
    with open(sample_file_path, "r", encoding="utf-8") as file:
        sample_email_text = file.read()

    # Preprocess the sample email text
    sample_email_text = preprocess_text(sample_email_text)
    sample_sequences = tokenizer.texts_to_sequences([sample_email_text])
    sample_email_padded = pad_sequences(sample_sequences, maxlen=max_len, padding="post", truncating="post")

    # Prediction
    prediction = model.predict(sample_email_padded)
    threshold = 0.5

    if prediction[0][0] > threshold:
        print(f"Sample email ({sample_file_path}): SPAM")
    else:
        print(f"Sample email ({sample_file_path}): NOT SPAM")
except FileNotFoundError:
    print(f"File not found: {sample_file_path}")
except Exception as e:
    print(f"An error occurred: {e}")


Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step - accuracy: 0.5000 - loss: 0.6932 - val_accuracy: 0.5000 - val_loss: 0.6932
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 179ms/step - accuracy: 0.5000 - loss: 0.6932 - val_accuracy: 0.5000 - val_loss: 0.6932
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step - accuracy: 0.5000 - loss: 0.6931 - val_accuracy: 0.5000 - val_loss: 0.6931
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step - accuracy: 0.5000 - loss: 0.6931 - val_accuracy: 0.5000 - val_loss: 0.6932
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step - accuracy: 0.5000 - loss: 0.6932 - val_accuracy: 0.5000 - val_loss: 0.6932
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step - accuracy: 0.5000 - loss: 0.6931 - val_accuracy: 0.5000 - val_loss: 0.6932
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 351ms/step
Sample email (Spam.txt): SPAM
