# Naive Bayes

### Full Code

In [1]:
import pandas as pd 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.feature_extraction.text import CountVectorizer 
from sklearn.naive_bayes import MultinomialNB 

df = pd.read_csv('Datasets/mail_data.csv')

# Convert labels: 'spam' -> 1, 'ham' -> 0
df['Category'] = df['Category'].map({'ham': 0, 'spam': 1})

X = df['Message']
y = df['Category']

# Convert text to numerical vectors using CountVectorizer
vectorizer = CountVectorizer(stop_words='english') 
X_vectorized = vectorizer.fit_transform(X) 

X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=0) 

nb = MultinomialNB() 
nb.fit(X_train, y_train)

y_pred = nb.predict(X_test)

# Evaluate performance
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')
print('Confusion Matrix:')
print(conf_matrix)
print('\nClassification Report:')
print(classification_report(y_test, y_pred))

# User input for testing
user_input = input("\nEnter your email text: ")
# Vectorize user input using the SAME vectorizer 
user_vector = vectorizer.transform([user_input]) 
# Predict 
prediction = nb.predict(user_vector)[0] 
print("\nPrediction Result:")
if prediction == 1: 
    print("The email is classified as: SPAM") 
else: 
    print("The email is classified as: NOT SPAM")


Accuracy: 98.12%
Confusion Matrix:
[[941  14]
 [  7 153]]

Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       955
           1       0.92      0.96      0.94       160

    accuracy                           0.98      1115
   macro avg       0.95      0.97      0.96      1115
weighted avg       0.98      0.98      0.98      1115


Enter your email text: You have Won Rs 1,000

Prediction Result:
The email is classified as: SPAM
