In [None]:
# Name: Email Spam Classifier
"""This project builds a classification model to predict whether a customer will buy a product or not based on features like age and annual income.
Algorithm Used: Logistic Regression
Dataset: Custom dataset with age, income, and purchase decision (0 = No, 1 = Yes)
Goal: Demonstrate how ML models can help in marketing strategy and customer targeting by identifying potential buyers.
This project showcases the practical use of Machine Learning in business decision-making.
"""

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix

# Step 1: Create small dataset
data = {
    "Email": [
        "Win a lottery now!!!",
        "Get free money instantly",
        "Hello friend, how are you?",
        "Meeting scheduled for tomorrow",
        "Claim your free prize",
        "Let’s go for lunch today",
        "You won a jackpot!!!",
        "Reminder for project submission"
    ],
    "Spam": [1, 1, 0, 0, 1, 0, 1, 0]  # 1 = Spam, 0 = Not Spam
}

df = pd.DataFrame(data)

# Step 2: Text Vectorization
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df["Email"])
y = df["Spam"]

# Step 3: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 4: Train Naive Bayes Model
model = MultinomialNB()
model.fit(X_train, y_train)

# Step 5: Predictions
y_pred = model.predict(X_test)

# Step 6: Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Try with new email
new_email = ["Congratulations, you have won a free trip!"]
new_email_vector = vectorizer.transform(new_email)
print("Prediction (1=Spam, 0=Not Spam):", model.predict(new_email_vector))
