In [3]:
!pip install scikit-learn



In [4]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix

# Create synthetic email dataset
# 1 indicates spam, 0 indicates not spam
emails = [
    {'text': 'Hey, wanna hang out tonight?', 'label': 0},
    {'text': 'Free money, click here now!', 'label': 1},
    {'text': 'What are you doing later?', 'label': 0},
    {'text': 'Congratulations, you won a prize!', 'label': 1},
    {'text': 'Your account will be suspended', 'label': 1},
    {'text': 'Your order has been shipped', 'label': 0},
    {'text': 'Win a gift card worth $1000', 'label': 1},
    {'text': 'Meeting has been rescheduled', 'label': 0},
    {'text': 'Reminder: Appointment at 3PM', 'label': 0},
    {'text': 'Get cash now, apply within', 'label': 1},
]

# Split into text and label lists
texts = [email['text'] for email in emails]
labels = [email['label'] for email in emails]

# Vectorize the text data
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(texts)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.3, random_state=42)

# Train a Naive Bayes classifier
clf = MultinomialNB()
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Evaluate the model
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"Confusion Matrix:\n{confusion_matrix(y_test, y_pred)}")

# Testing with new emails
new_emails = [
    'Let\'s have dinner tonight',
    'You won a lottery of $10,000!',
    'Your prescription is ready'
]

new_emails_vectorized = vectorizer.transform(new_emails)
new_pred = clf.predict(new_emails_vectorized)

print("New email predictions:", new_pred)


Accuracy: 1.0
Confusion Matrix:
[[2 0]
 [0 1]]
New email predictions: [0 1 1]
