In [11]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


df = pd.read_csv("/content/spam.csv", encoding="latin1")


df = df[['v1', 'v2']]
df.columns = ['label', 'text']


df['label'] = df['label'].map({'ham': 0, 'spam': 1})


X_train, X_test, y_train, y_test = train_test_split(
    df['text'], df['label'], test_size=0.2, random_state=42
)


tfidf = TfidfVectorizer(stop_words='english')
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)


model = LogisticRegression(max_iter=2000)
model.fit(X_train_tfidf, y_train)

pred = model.predict(X_test_tfidf)

print("\nAccuracy:", accuracy_score(y_test, pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, pred))
print("\nClassification Report:\n", classification_report(y_test, pred))


tests = [
    "Congratulations! You won a free lottery ticket!",
    "Meeting at 5 PM today?",
    "Get FREE recharge now!",
    "Please submit your assignment",
    "You won 50,000 cash prize!"
]



for msg in tests:
    vec = tfidf.transform([msg])
    pred = model.predict(vec)[0]
    result = "SPAM" if pred == 1 else "NOT SPAM"
    print(f"Message: {msg}\nPrediction: {result}\n")



Accuracy: 0.9524663677130045

Confusion Matrix:
 [[962   3]
 [ 50 100]]

Classification Report:
               precision    recall  f1-score   support

           0       0.95      1.00      0.97       965
           1       0.97      0.67      0.79       150

    accuracy                           0.95      1115
   macro avg       0.96      0.83      0.88      1115
weighted avg       0.95      0.95      0.95      1115

Message: Congratulations! You won a free lottery ticket!
Prediction: SPAM

Message: Meeting at 5 PM today?
Prediction: NOT SPAM

Message: Get FREE recharge now!
Prediction: NOT SPAM

Message: Please submit your assignment
Prediction: NOT SPAM

Message: You won 50,000 cash prize!
Prediction: SPAM

