In [1]:
# Import necessary libraries
import pandas as pd 
import numpy as np 
from sklearn.model_selection import train_test_split 
from sklearn.feature_extraction.text import TfidfVectorizer 
from sklearn.naive_bayes import MultinomialNB 
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix 

# Load the dataset
data = pd.read_csv('spam email.csv') 
print("\nFirst 5 rows of the dataset:\n", data.head()) 

# Drop missing values and convert text to lowercase
data.dropna(inplace=True) 
data['text'] = data['text'].str.lower() 

# Separate features and target labels
X = data['text'] 
y = data['label'] 

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) 

# Vectorize text data using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000) 
X_train_tfidf = vectorizer.fit_transform(X_train) 
X_test_tfidf = vectorizer.transform(X_test) 

# Initialize and train the Naive Bayes model
model = MultinomialNB() 
model.fit(X_train_tfidf, y_train) 

# Make predictions on the test set
y_pred = model.predict(X_test_tfidf) 

# Evaluate the model
print("\nModel Evaluation:") 
print("Accuracy:", accuracy_score(y_test, y_pred)) 
print("\nClassification Report:\n", classification_report(y_test, y_pred)) 
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred)) 

# Test the model with a new email example
new_email = ["Congratulations! You've won a $1000 gift card. Click here to claim."] 
new_email_tfidf = vectorizer.transform(new_email) 
prediction = model.predict(new_email_tfidf) 
print("\nPrediction for new email:", "Scam" if prediction[0] == 1 else "Not Scam")



First 5 rows of the dataset:
                                                 text  label
0  Congratulations! You've won a prize. Click her...      1
1           Your account statement is now available.      0
2  Update your payment details to avoid service i...      0
3       Claim your free vacation now! Limited offer!      1
4               Meeting rescheduled to next Tuesday.      0

Model Evaluation:
Accuracy: 0.0

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       2.0
           1       0.00      0.00      0.00       0.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0


Confusion Matrix:
 [[0 2]
 [0 0]]

Prediction for new email: Scam


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
