In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline

# Load the CSV file
data = pd.read_csv("spam.csv", encoding='latin-1')

# Preprocess the data
data = data.rename(columns={'v1': 'label', 'v2': 'message'})
data = data[['label', 'message']]
data['label'] = data['label'].map({'ham': 0, 'spam': 1})

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(data['message'], data['label'], test_size=0.2, random_state=42)

# Create a pipeline with TF-IDF vectorizer and SVM
model = make_pipeline(TfidfVectorizer(stop_words='english'), SVC(kernel='linear', probability=True))

# Train the model
model.fit(X_train, y_train)

# Function to predict and get probability
def predict_message(message):
    prediction = model.predict([message])[0]
    probability = model.predict_proba([message])[0][prediction]
    label = 'spam' if prediction == 1 else 'not spam'
    return label, probability

In [None]:
user_input = input("Enter an SMS message: ")
label, probability = predict_message(user_input)
print(f"The message is classified as '{label}' with a probability of {probability:.2f}")

Enter an SMS message: Congratulations! You've been selected to receive a free $1000 gift card! Just click here to claim your prize: www.freegiftcard.com
The message is classified as 'spam' with a probability of 1.00
