In [65]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [66]:
data = {
    "label": [
        "ham","ham","spam","spam","ham","ham","spam","ham","spam","spam",
        "ham","ham","spam","ham","spam","ham"
    ],
    "text": [
        "Hey are we meeting today",
        "Please send me the notes",
        "Win cash prize now",
        "Congratulations you have won a lottery",
        "Are you coming to class",
        "Let's go for lunch",
        "Limited offer buy now",
        "Call me when you are free",
        "Urgent claim your prize",
        "Free entry in contest",
        "How was your exam",
        "See you tomorrow",
        "Your account has been suspended",
        "Can you help me with assignment",
        "You won a free mobile phone",
        "Meeting postponed to tomorrow"
    ]
}

df = pd.DataFrame(data)

print("Dataset Created Successfully!\n")
print(df.head())

Dataset Created Successfully!

  label                                    text
0   ham                Hey are we meeting today
1   ham                Please send me the notes
2  spam                      Win cash prize now
3  spam  Congratulations you have won a lottery
4   ham                 Are you coming to class


In [67]:
df['label_num'] = df['label'].map({'ham': 0, 'spam': 1})

In [68]:
X = df['text']
y = df['label_num']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

In [69]:
tfidf = TfidfVectorizer(stop_words='english', max_features=2000)

X_train_vec = tfidf.fit_transform(X_train)
X_test_vec = tfidf.transform(X_test)

# 5. TRAIN LOGISTIC REGRESSION MODEL
model = LogisticRegression()
model.fit(X_train_vec, y_train)

print("\nModel Trained Successfully!")


Model Trained Successfully!


In [70]:
y_pred = model.predict(X_test_vec)

print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.5

Confusion Matrix:
 [[1 2]
 [0 1]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.33      0.50         3
           1       0.33      1.00      0.50         1

    accuracy                           0.50         4
   macro avg       0.67      0.67      0.50         4
weighted avg       0.83      0.50      0.50         4



In [71]:
def predict_with_details(message):
    msg_vec = tfidf.transform([message])

    linear_score = model.decision_function(msg_vec)[0]
    probability = model.predict_proba(msg_vec)[0][1]
    prediction_num = model.predict(msg_vec)[0]
    prediction_label = "Spam" if prediction_num == 1 else "Not Spam"

    print("Message:", message)
    print("Linear Score:", linear_score)
    print("Probability of Spam:", probability)
    print("Prediction Number:", prediction_num)
    print("Prediction Label:", prediction_label)
    print("-" * 40)

In [72]:
print("\n----- SAMPLE PREDICTIONS -----\n")

predict_with_details("You have won a lottery")
predict_with_details("Please attend the meeting tomorrow")


----- SAMPLE PREDICTIONS -----

Message: You have won a lottery
Linear Score: 0.3398290997279493
Probability of Spam: 0.584149008618854
Prediction Number: 1
Prediction Label: Spam
----------------------------------------
Message: Please attend the meeting tomorrow
Linear Score: -0.5173840816466614
Probability of Spam: 0.3734641260600415
Prediction Number: 0
Prediction Label: Not Spam
----------------------------------------
