# 📌 Spam Email Detection using Scikit-learn

This notebook demonstrates how to build a simple predictive model to classify emails as **spam** or **not spam** using Scikit-learn.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
# Load dataset
data = pd.read_csv("spam.csv", encoding="latin-1")
data = data[['v1', 'v2']]
data.columns = ['label', 'message']
data['label'] = data['label'].map({'ham': 0, 'spam': 1})
data.head()

In [None]:
# Split data
X = data['message']
y = data['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Vectorize text data
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [None]:
# Train model
model = MultinomialNB()
model.fit(X_train_vec, y_train)

In [None]:
# Predictions
y_pred = model.predict(X_test_vec)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [None]:
# Test with custom message
sample_msg = ["Congratulations! You won a free lottery ticket, claim now!"]
sample_vec = vectorizer.transform(sample_msg)
print("Prediction (1=spam, 0=ham):", model.predict(sample_vec))