In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Sample data
emails = [
    "Win a free iPhone now",
    "Meeting at 11 am tomorrow",
    "Congratulations you won lottery",
    "Project discussion with team",
    "Claim your prize immediately",
    "Please find the attached report",
    "Limited offer buy now",
    "Urgent offer expires today",
    "Schedule the meeting for Monday",
    "You have won a cash prize",
    "Monthly performance report attached",
    "Exclusive deal just for you",
]

labels = [1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1]

# Feature extraction
vectorizer = TfidfVectorizer(
    lowercase=True,
    stop_words='english',
    ngram_range=(1, 2),
    max_df=0.9,
    min_df=1
)

X = vectorizer.fit_transform(emails)
X_train, X_test, y_train, y_test = train_test_split(
    X, labels, test_size=0.25, random_state=42, stratify=labels
)

# Model Training
svm_model = LinearSVC(C=1.0)
svm_model.fit(X_train, y_train)

# Evaluation
y_pred = svm_model.predict(X_test)
print("Improved Model Accuracy:", accuracy_score(y_test, y_pred))

# Prediction on new input
new_email = input("\nEnter a new email message: ")
new_email_vector = vectorizer.transform([new_email])
prediction = svm_model.predict(new_email_vector)

if prediction[0] == 1:
    print("Result: Spam Email")
else:
    print("Result: Not Spam Email")

Improved Model Accuracy: 1.0



Enter a new email message:  velop their data skills are poised to enhance their professional prospects in increasingly data-centric organizations, industries, and jobs


Result: Spam Email
