In [2]:
import pandas as pd
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt_tab')
data = {
    'text': [
        "Congratulations! You've won a $1000 Walmart gift card. Click here to claim now.",
        "Dear user, your account statement is ready for download.",
        "Limited time offer! Get cheap meds now!!!",
        "Meeting rescheduled to 3 PM tomorrow. Please confirm your attendance.",
        "Your OTP for login is 458930. Do not share with anyone.",
        "Win a brand new iPhone! Visit our website to participate.",
        "Get 70% off on all electronic gadgets. Hurry up before stock runs out!",
        "Reminder: Your electricity bill is due on 10th November.",
        "This is not spam! You have been selected for a free cruise trip!",
        "Please review the attached project document and provide feedback.",
        "Exclusive deal: Buy 1 Get 1 Free on all perfumes!",
        "Salary credited successfully to your bank account.",
        "Earn money working from home. No experience required!",
        "Your Amazon order has been shipped successfully.",
        "Claim your lottery prize by clicking the link below.",
        "Weekly meeting postponed due to maintenance.",
        "Get rich quick! Join our investment plan and double your income.",
        "Important security update for your account. Please login to verify.",
        "Your train ticket has been booked successfully.",
        "Win a free vacation to Hawaii! Click here to enter now!"
    ],
    'label': [
        'spam', 'ham', 'spam', 'ham', 'ham',
        'spam', 'spam', 'ham', 'spam', 'ham',
        'spam', 'ham', 'spam', 'ham', 'spam',
        'ham', 'spam', 'ham', 'ham', 'spam'
    ]
}
df = pd.DataFrame(data)
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))
def preprocess_text(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    tokens = nltk.word_tokenize(text)
    clean_tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
    return ' '.join(clean_tokens)
df['clean_text'] = df['text'].apply(preprocess_text)
print("‚úÖ Sample Preprocessed Emails:")
print(df[['text', 'clean_text', 'label']].head())
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['clean_text'])
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
model = MultinomialNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("\nüìä Model Performance:")
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))
def predict_email(text):
    clean = preprocess_text(text)
    vec = vectorizer.transform([clean])
    pred = model.predict(vec)[0]
    return pred
sample_email = "Congratulations! You have been selected for a free vacation to Paris!"
print("\n‚úâÔ∏è New Email Prediction:")
print(sample_email)
print("‚Üí Classified as:", predict_email(sample_email))

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


‚úÖ Sample Preprocessed Emails:
                                                text  \
0  Congratulations! You've won a $1000 Walmart gi...   
1  Dear user, your account statement is ready for...   
2          Limited time offer! Get cheap meds now!!!   
3  Meeting rescheduled to 3 PM tomorrow. Please c...   
4  Your OTP for login is 458930. Do not share wit...   

                                          clean_text label  
0  congratulation youve 1000 walmart gift card cl...  spam  
1         dear user account statement ready download   ham  
2                   limited time offer get cheap med  spam  
3  meeting rescheduled 3 pm tomorrow please confi...   ham  
4                      otp login 458930 share anyone   ham  

üìä Model Performance:
              precision    recall  f1-score   support

         ham       1.00      1.00      1.00         3
        spam       1.00      1.00      1.00         3

    accuracy                           1.00         6
   macro avg       1.0