In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report

In [20]:
# Load the CSV file
data = pd.read_csv('spam.csv', encoding='latin-1')

# Preprocess the data
data = data.rename(columns={'v1': 'label', 'v2': 'message'})
data = data[['label', 'message']]
data['label'] = data['label'].map({'ham': 0, 'spam': 1})

In [21]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(data['message'], data['label'], test_size=0.2, random_state=42)

In [22]:
# Create a pipeline with TF-IDF vectorizer and Logistic Regression
model = make_pipeline(TfidfVectorizer(stop_words='english'), LogisticRegression(max_iter=1000))

In [23]:
# Train the model
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.95      1.00      0.97       965
           1       0.97      0.67      0.79       150

    accuracy                           0.95      1115
   macro avg       0.96      0.83      0.88      1115
weighted avg       0.95      0.95      0.95      1115



In [25]:
def predict_message(message):
    prediction = model.predict([message])[0]
    probability = model.predict_proba([message])[0][prediction]
    label = 'spam' if prediction == 1 else 'not spam'
    return label, probability

# Take dynamic input from the user
while True:
    user_input = input("Enter an SMS message (or type 'exit' to quit): ")
    if user_input.lower() == 'exit':
        break
    label, probability = predict_message(user_input)
    print(f"The message is classified as '{label}' with a probability of {probability:.2f}\n")

Enter an SMS message (or type 'exit' to quit): Oh k...i'm watching here:)
The message is classified as 'not spam' with a probability of 0.97

Enter an SMS message (or type 'exit' to quit): As a valued customer, I am pleased to advise you that following recent review of your Mob No. you are awarded with a 螢1500 Bonus Prize, call 09066364589
The message is classified as 'spam' with a probability of 0.56

Enter an SMS message (or type 'exit' to quit): exit
